From 03836796b8823cdf2013363d2dc689b1853eb230 Mon Sep 17 00:00:00 2001 From: stevenhua0320 Date: Mon, 29 Jul 2024 16:42:27 +0800 Subject: [PATCH] lint check and fix print and exception python2 issue --- devutils/prep.py | 4 +- diffpy/srmise/basefunction.py | 141 ++++--- diffpy/srmise/dataclusters.py | 168 +++++--- diffpy/srmise/modelcluster.py | 565 +++++++++++++++------------ diffpy/srmise/modelparts.py | 234 ++++++----- diffpy/srmise/multimodelselection.py | 248 +++++++----- diffpy/srmise/pdfdataset.py | 175 +++++---- 7 files changed, 916 insertions(+), 619 deletions(-) diff --git a/devutils/prep.py b/devutils/prep.py index c63ce0f..8ecea6b 100644 --- a/devutils/prep.py +++ b/devutils/prep.py @@ -108,8 +108,6 @@ def rm(directory, filerestr): ### Convert output of example files to Unix-style endlines for sdist. if os.linesep != '\n': - print "==== Scrubbing Endlines ====" + print"==== Scrubbing Endlines ====" # All *.srmise and *.pwa files in examples directory. scrubeol("../doc/examples/output", r".*(\.srmise|\.pwa)") - - diff --git a/diffpy/srmise/basefunction.py b/diffpy/srmise/basefunction.py index 18a55e4..e9af1ec 100644 --- a/diffpy/srmise/basefunction.py +++ b/diffpy/srmise/basefunction.py @@ -17,12 +17,14 @@ import sys import numpy as np +from numpy.compat import unicode from diffpy.srmise.modelparts import ModelPart, ModelParts from diffpy.srmise.srmiseerrors import * logger = logging.getLogger("diffpy.srmise") + class BaseFunction(object): """Base class for mathematical functions which model numeric sequences. @@ -61,7 +63,15 @@ class BaseFunction(object): transform_parameters() """ - def __init__(self, parameterdict, parformats, default_formats, metadict, base=None, Cache=None): + def __init__( + self, + parameterdict, + parformats, + default_formats, + metadict, + base=None, + Cache=None, + ): """Set parameterdict defined by subclass Parameters @@ -96,23 +106,31 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No vals = self.parameterdict.values() vals.sort() if vals != range(self.npars): - emsg = "Argument parameterdict's values must uniquely specify "+\ - "the index of each parameter defined by its keys." + emsg = ( + "Argument parameterdict's values must uniquely specify " + + "the index of each parameter defined by its keys." + ) raise ValueError(emsg) self.parformats = parformats # Check validity of default_formats self.default_formats = default_formats - if not ("default_input" in self.default_formats and - "default_output" in self.default_formats): - emsg = "Argument default_formats must specify 'default_input' "+\ - "and 'default_output' as keys." + if not ( + "default_input" in self.default_formats + and "default_output" in self.default_formats + ): + emsg = ( + "Argument default_formats must specify 'default_input' " + + "and 'default_output' as keys." + ) raise ValueError(emsg) for f in self.default_formats.values(): if not f in self.parformats: - emsg = "Keys of argument default_formats must map to a "+\ - "value within argument parformats." + emsg = ( + "Keys of argument default_formats must map to a " + + "value within argument parformats." + ) raise ValueError() # Set metadictionary @@ -126,12 +144,11 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No # of PeakFunction. # Object to cache: (basefunctioninstance, tuple of parameters) if Cache is not None: - #self.value = Cache(self.value, "value") - #self.jacobian = Cache(self.jacobian, "jacobian") + # self.value = Cache(self.value, "value") + # self.jacobian = Cache(self.jacobian, "jacobian") pass return - #### "Virtual" class methods #### def actualize(self, *args, **kwds): @@ -164,7 +181,6 @@ def _valueraw(self, *args, **kwds): emsg = "_valueraw must() be implemented in a BaseFunction subclass." raise NotImplementedError(emsg) - #### Class methods #### def jacobian(self, p, r, rng=None): @@ -179,8 +195,10 @@ def jacobian(self, p, r, rng=None): previously calculated values instead. """ if self is not p._owner: - emsg = "Argument 'p' must be evaluated by the BaseFunction "+\ - "subclass which owns it." + emsg = ( + "Argument 'p' must be evaluated by the BaseFunction " + + "subclass which owns it." + ) raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -192,7 +210,7 @@ def jacobian(self, p, r, rng=None): output = [None for j in jac] for idx in range(len(output)): if jac[idx] is not None: - output[idx] = r * 0. + output[idx] = r * 0.0 output[idx][rng] = jac[idx] return output except TypeError: @@ -201,10 +219,10 @@ def jacobian(self, p, r, rng=None): def transform_derivatives(self, pars, in_format=None, out_format=None): """Return gradient matrix for pars converted from in_format to out_format. - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class + Parameters + pars - Sequence of parameters + in_format - A format defined for this class + out_format - A format defined for this class """ # Map unspecified formats to specific formats defined in default_formats if in_format is None: @@ -223,25 +241,29 @@ def transform_derivatives(self, pars, in_format=None, out_format=None): out_format = self.default_formats["default_input"] if not in_format in self.parformats: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'in_format' must be one of %s." % self.parformats + ) if not out_format in self.parformats: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'out_format' must be one of %s." % self.parformats + ) if in_format == out_format: return np.identity(self.npars) - return self._transform_derivativesraw(pars, in_format=in_format, out_format=out_format) + return self._transform_derivativesraw( + pars, in_format=in_format, out_format=out_format + ) def transform_parameters(self, pars, in_format=None, out_format=None): """Return new sequence with pars converted from in_format to out_format. - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. + Also restores parameters to a preferred range if it permits multiple + values that correspond to the same physical result. - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class + Parameters + pars - Sequence of parameters + in_format - A format defined for this class + out_format - A format defined for this class """ # Map unspecified formats to specific formats defined in default_formats if in_format is None: @@ -260,15 +282,18 @@ def transform_parameters(self, pars, in_format=None, out_format=None): out_format = self.default_formats["default_input"] if not in_format in self.parformats: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'in_format' must be one of %s." % self.parformats + ) if not out_format in self.parformats: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - #if in_format == out_format: + raise ValueError( + "Argument 'out_format' must be one of %s." % self.parformats + ) + # if in_format == out_format: # return pars - return self._transform_parametersraw(pars, in_format=in_format, out_format=out_format) - + return self._transform_parametersraw( + pars, in_format=in_format, out_format=out_format + ) def value(self, p, r, rng=None): """Calculate value of ModelPart over r, possibly restricted by range. @@ -282,8 +307,10 @@ def value(self, p, r, rng=None): previously calculated values instead. """ if self is not p._owner: - emsg = "Argument 'p' must be evaluated by the BaseFunction "+\ - "subclass which owns it." + emsg = ( + "Argument 'p' must be evaluated by the BaseFunction " + + "subclass which owns it." + ) raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -291,7 +318,7 @@ def value(self, p, r, rng=None): if rng is None: rng = slice(0, len(r)) rpart = r[rng] - output = r * 0. + output = r * 0.0 output[rng] = self._valueraw(p.pars, rpart) return output except TypeError: @@ -338,17 +365,17 @@ def writestr(self, baselist): raise ValueError("emsg") lines = [] # Write function type - lines.append("function=%s" %repr(self.__class__.__name__)) - lines.append("module=%s" %repr(self.getmodule())) + lines.append("function=%s" % repr(self.__class__.__name__)) + lines.append("module=%s" % repr(self.getmodule())) # Write base if self.base is not None: - lines.append("base=%s" %repr(baselist.index(self.base))) + lines.append("base=%s" % repr(baselist.index(self.base))) else: - lines.append("base=%s" %repr(None)) + lines.append("base=%s" % repr(None)) # Write all other metadata for k, (v, f) in self.metadict.iteritems(): - lines.append("%s=%s" %(k, f(v))) - datastring = "\n".join(lines)+"\n" + lines.append("%s=%s" % (k, f(v))) + datastring = "\n".join(lines) + "\n" return datastring @staticmethod @@ -367,19 +394,19 @@ def factory(functionstr, baselist): # populate dictionary with parameter definition # "key=value"->{"key":"value"} - data = re.split(r'(?:[\r\n]+|\A)(\S+)=', data) + data = re.split(r"(?:[\r\n]+|\A)(\S+)=", data) ddict = {} - for i in range(len(data)/2): - ddict[data[2*i+1]] = data[2*i+2] + for i in range(len(data) / 2): + ddict[data[2 * i + 1]] = data[2 * i + 2] # dictionary of parameters pdict = {} - for (k, v) in ddict.items(): + for k, v in ddict.items(): try: pdict[k] = eval(v) - except Exception, e: + except Exception as e: logger.exception(e) - emsg = ("Invalid parameter: %s=%s" %(k,v)) + emsg = "Invalid parameter: %s=%s" % (k, v) raise SrMiseDataFormatError(emsg) function_name = pdict["function"] @@ -438,9 +465,9 @@ def safefunction(f, fsafe): return -#end of class BaseFunction +# end of class BaseFunction -if __name__ == '__main__': +if __name__ == "__main__": from diffpy.srmise.peaks import GaussianOverR, TerminationRipples @@ -451,7 +478,7 @@ def safefunction(f, fsafe): pt = TerminationRipples(p, 20) outstr2 = pt.writestr([p]) - print outstr + print(outstr) pt2 = BaseFunction.factory(outstr2, [p]) - print type(pt2) + print(type(pt2)) diff --git a/diffpy/srmise/dataclusters.py b/diffpy/srmise/dataclusters.py index 077c1a8..0bf968b 100644 --- a/diffpy/srmise/dataclusters.py +++ b/diffpy/srmise/dataclusters.py @@ -21,6 +21,7 @@ logger = logging.getLogger("diffpy.srmise") + class DataClusters: """Find clusters corresponding to peaks in numerical x-, y-value arrays. @@ -51,7 +52,7 @@ def __init__(self, x, y, res): y - corresponding sequence of y-values res - clustering 'resolution' """ - #Track internal state of clustering. + # Track internal state of clustering. self.INIT = 0 self.READY = 1 self.CLUSTERING = 2 @@ -62,14 +63,12 @@ def __init__(self, x, y, res): return - # This iterator operates not over found clusters, but over the process of # clustering. This behavior could cause confusion and should perhaps be # altered. def __iter__(self): return self - def clear(self): """Clear all members, including user data.""" self.x = np.array([]) @@ -85,7 +84,7 @@ def clear(self): def reset_clusters(self): """Reset all progress on clustering.""" - self.clusters = np.array([[self.data_order[-1],self.data_order[-1]]]) + self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) self.current_idx = self.data_order.size - 1 self.lastcluster_idx = 0 self.lastpoint_idx = self.data_order[-1] @@ -102,7 +101,7 @@ def setdata(self, x, y, res): y - corresponding sequence of y-values res - clustering 'resolution' """ - #Test for error conditions + # Test for error conditions # 1) Length mismatch # 2) Bound errors for res # 3) r isn't sorted? @@ -116,7 +115,7 @@ def setdata(self, x, y, res): self.y = y self.res = res - self.data_order = self.y.argsort() # Defines order of clustering + self.data_order = self.y.argsort() # Defines order of clustering self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) self.current_idx = len(self.data_order) - 1 self.lastcluster_idx = 0 @@ -169,9 +168,10 @@ def next(self): self.lastcluster_idx = nearest_cluster[0] else: # insert right of nearest cluster - self.lastcluster_idx = nearest_cluster[0]+1 - self.clusters = np.insert(self.clusters, self.lastcluster_idx, - [test_idx, test_idx], 0) + self.lastcluster_idx = nearest_cluster[0] + 1 + self.clusters = np.insert( + self.clusters, self.lastcluster_idx, [test_idx, test_idx], 0 + ) return self def makeclusters(self): @@ -200,10 +200,10 @@ def find_nearest_cluster2(self, x): return self.find_nearest_cluster(idx) else: # Choose adjacent index nearest to x - if (self.x[idx] - x) < (x - self.x[idx-1]): + if (self.x[idx] - x) < (x - self.x[idx - 1]): return self.find_nearest_cluster(idx) else: - return self.find_nearest_cluster(idx-1) + return self.find_nearest_cluster(idx - 1) def find_nearest_cluster(self, idx): """Return [cluster index, distance] for cluster nearest to x[idx]. @@ -225,23 +225,27 @@ def find_nearest_cluster(self, idx): return None flat_idx = clusters_flat.searchsorted(idx) - near_idx = flat_idx/2 + near_idx = flat_idx / 2 if flat_idx == len(clusters_flat): - #test_idx is right of the last cluster - return [near_idx-1, self.x[idx]-self.x[self.clusters[-1, 1]]] - if clusters_flat[flat_idx] == idx or flat_idx%2 == 1: + # test_idx is right of the last cluster + return [near_idx - 1, self.x[idx] - self.x[self.clusters[-1, 1]]] + if clusters_flat[flat_idx] == idx or flat_idx % 2 == 1: # idx is within some cluster return [near_idx, 0.0] if flat_idx == 0: # idx is left of the first cluster - return [near_idx, self.x[idx]-self.x[self.clusters[0,0]]] + return [near_idx, self.x[idx] - self.x[self.clusters[0, 0]]] # Calculate which of the two nearest clusters is closer - distances=np.array([self.x[idx]-self.x[self.clusters[near_idx-1, 1]], - self.x[idx]-self.x[self.clusters[near_idx, 0]]]) + distances = np.array( + [ + self.x[idx] - self.x[self.clusters[near_idx - 1, 1]], + self.x[idx] - self.x[self.clusters[near_idx, 0]], + ] + ) if distances[0] < np.abs(distances[1]): - return [near_idx-1, distances[0]] + return [near_idx - 1, distances[0]] else: return [near_idx, distances[1]] @@ -255,15 +259,17 @@ def cluster_is_full(self, cluster_idx): cluster_idx - The index of the cluster to test """ if cluster_idx > 0: - low = self.clusters[cluster_idx-1, 1] + 1 + low = self.clusters[cluster_idx - 1, 1] + 1 else: low = 0 if cluster_idx < len(self.clusters) - 1: - high = self.clusters[cluster_idx+1, 0] - 1 + high = self.clusters[cluster_idx + 1, 0] - 1 else: high = len(self.data_order) - 1 - return self.clusters[cluster_idx, 0] == low \ - and self.clusters[cluster_idx, 1] == high + return ( + self.clusters[cluster_idx, 0] == low + and self.clusters[cluster_idx, 1] == high + ) def combine_clusters(self, combine): """Combine clusters specified by each subarray of cluster indices. @@ -283,15 +289,35 @@ def combine_clusters(self, combine): # Test that all clusters are contiguous and adjacent first = c[0] for i in range(c[0], c[-1]): - if c[i+1-first]-1 != c[i-first]: - raise ValueError(''.join(["Clusters ", str(c[i]), " and ", str(c[i+1]), " are not contiguous and/or increasing."])) - if self.clusters[i+1, 0]-self.clusters[i, 1] != 1: - raise ValueError(''.join(["Clusters ", str(c[i]), " and ", str(c[i+1]), " have unclustered points between them."])) - - #update cluster endpoints + if c[i + 1 - first] - 1 != c[i - first]: + raise ValueError( + "".join( + [ + "Clusters ", + str(c[i]), + " and ", + str(c[i + 1]), + " are not contiguous and/or increasing.", + ] + ) + ) + if self.clusters[i + 1, 0] - self.clusters[i, 1] != 1: + raise ValueError( + "".join( + [ + "Clusters ", + str(c[i]), + " and ", + str(c[i + 1]), + " have unclustered points between them.", + ] + ) + ) + + # update cluster endpoints self.clusters[c[0], 1] = self.clusters[c[-1], 1] todelete = np.array([c[1:] for c in combine]).ravel() - self.clusters = np.delete(self.clusters, todelete ,0) + self.clusters = np.delete(self.clusters, todelete, 0) def find_adjacent_clusters(self): """Return all cluster indices with no unclustered points between them. @@ -306,20 +332,26 @@ def find_adjacent_clusters(self): adj = [] left_idx = 0 - while left_idx < len(self.clusters)-1: - while left_idx < len(self.clusters)-1 and self.clusters[left_idx+1, 0] - self.clusters[left_idx, 1] !=1: + while left_idx < len(self.clusters) - 1: + while ( + left_idx < len(self.clusters) - 1 + and self.clusters[left_idx + 1, 0] - self.clusters[left_idx, 1] != 1 + ): left_idx += 1 # Not left_idx+1 since left_idx=len(self.clusters)-2 even if no # clusters are actually adjacent. right_idx = left_idx - while right_idx < len(self.clusters)-1 and self.clusters[right_idx+1, 0] - self.clusters[right_idx, 1] == 1: + while ( + right_idx < len(self.clusters) - 1 + and self.clusters[right_idx + 1, 0] - self.clusters[right_idx, 1] == 1 + ): right_idx += 1 if right_idx > left_idx: - adj.append(range(left_idx, right_idx+1)) - left_idx = right_idx+1 # set for next possible left_idx + adj.append(range(left_idx, right_idx + 1)) + left_idx = right_idx + 1 # set for next possible left_idx return np.array(adj) def cut(self, idx): @@ -331,24 +363,23 @@ def cut(self, idx): data_ids = self.clusters[idx] if len(data_ids) == data_ids.size: # idx is a scalar, so give single slice object - return slice(data_ids[0], data_ids[1]+1) + return slice(data_ids[0], data_ids[1] + 1) else: # idx is a list/slice, so give list of slice objects - return [slice(c[0], c[1]+1) for c in data_ids] + return [slice(c[0], c[1] + 1) for c in data_ids] def cluster_boundaries(self): """Return sequence with (x,y) of all cluster boundaries.""" boundaries = [] for l in self.clusters: - xlo = np.mean(self.x[l[0]-1:l[0]+1]) - ylo = np.mean(self.y[l[0]-1:l[0]+1]) - xhi = np.mean(self.x[l[1]:l[1]+2]) - yhi = np.mean(self.y[l[1]:l[1]+2]) + xlo = np.mean(self.x[l[0] - 1 : l[0] + 1]) + ylo = np.mean(self.y[l[0] - 1 : l[0] + 1]) + xhi = np.mean(self.x[l[1] : l[1] + 2]) + yhi = np.mean(self.y[l[1] : l[1] + 2]) boundaries.append((xlo, ylo)) boundaries.append((xhi, yhi)) return np.unique(boundaries) - def plot(self, *args, **kwds): """Plot the data with vertical lines at the cluster divisions. @@ -362,7 +393,7 @@ def plot(self, *args, **kwds): boundaries = self.cluster_boundaries() (ymin, ymax) = ax.get_ylim() for b in boundaries: - plt.axvline(b[0], 0, (b[1]-ymin)/(ymax-ymin), color='k') + plt.axvline(b[0], 0, (b[1] - ymin) / (ymax - ymin), color="k") plt.ion() ax.figure.canvas.draw() return @@ -376,18 +407,22 @@ def animate(self): status = self.status self.reset_clusters() + fig, ax = plt.subplots() + canvas = fig.canvas + background = canvas.copy_from_bbox(ax.bbox) + ymin, ymax = ax.get_ylim() all_lines = [] for i in self: canvas.restore_region(background) boundaries = self.cluster_boundaries() for i, b in enumerate(boundaries): - height = (b[1]-ymin)/(ymax-ymin) + height = (b[1] - ymin) / (ymax - ymin) if i < len(all_lines): all_lines[i].set_xdata([b[0], b[0]]) all_lines[i].set_ydata([0, height]) ax.draw_artist(all_lines[i]) else: - l = plt.axvline(b[0], 0, height, color='k', animated=True) + l = plt.axvline(b[0], 0, height, color="k", animated=True) ax.draw_artist(l) all_lines.append(l) canvas.blit(ax.bbox) @@ -399,21 +434,42 @@ def animate(self): self.status = status return -#End of class DataClusters - -# simple test code -if __name__ == '__main__': +# End of class DataClusters - x = np.array([-2., -1.5, -1., -0.5, 0., 0.5, 1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5.]) - y = np.array([0.0183156, 0.105399, 0.36788, 0.778806, 1.00012, 0.780731, 0.386195, 0.210798, 0.386195, 0.780731, 1.00012, 0.778806, 0.36788, 0.105399, 0.0183156]) - testcluster = DataClusters(x, y, .1) +# simple test code +if __name__ == "__main__": + + x = np.array( + [-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0] + ) + y = np.array( + [ + 0.0183156, + 0.105399, + 0.36788, + 0.778806, + 1.00012, + 0.780731, + 0.386195, + 0.210798, + 0.386195, + 0.780731, + 1.00012, + 0.778806, + 0.36788, + 0.105399, + 0.0183156, + ] + ) + + testcluster = DataClusters(x, y, 0.1) testcluster.makeclusters() - print testcluster.clusters + print(testcluster.clusters) adj = testcluster.find_adjacent_clusters() - print adj - if len(adj) >0: + print(adj) + if len(adj) > 0: testcluster.combine_clusters(adj) - print testcluster.clusters + print(testcluster.clusters) diff --git a/diffpy/srmise/modelcluster.py b/diffpy/srmise/modelcluster.py index 10c0677..1b4bfe0 100644 --- a/diffpy/srmise/modelcluster.py +++ b/diffpy/srmise/modelcluster.py @@ -57,8 +57,10 @@ class ModelCovariance(object): def __init__(self, *args, **kwds): """Intialize object.""" - self.cov = None # The raw covariance matrix - self.model = None # ModelParts instance, so both peaks and baseline (if present) + self.cov = None # The raw covariance matrix + self.model = ( + None # ModelParts instance, so both peaks and baseline (if present) + ) # Map i->[n1,n2,...] of the jth ModelPart to the n_i parameters in cov. self.mmap = {} @@ -96,11 +98,14 @@ def setcovariance(self, model, cov): emsg = "Parameter 'cov' must be a square matrix." raise ValueError(emsg) - - if tempcov.shape[0] != model.npars(True) and tempcov.shape[0] != model.npars(False): - emsg = ["Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", - "parameters in the model, or the total number of parameters (fixed and free) of ", - "the model."] + if tempcov.shape[0] != model.npars(True) and tempcov.shape[0] != model.npars( + False + ): + emsg = [ + "Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", + "parameters in the model, or the total number of parameters (fixed and free) of ", + "the model.", + ] raise ValueError("".join(emsg)) self.model = model.copy() @@ -113,8 +118,8 @@ def setcovariance(self, model, cov): for i, m in enumerate(model): self.mmap[i] = n + np.arange(m.npars(True)) for j, p in enumerate(m): - self.pmap[(i,j)] = n - self.ipmap[n] = (i,j) + self.pmap[(i, j)] = n + self.ipmap[n] = (i, j) n += 1 if n == tempcov.shape[0]: @@ -122,21 +127,20 @@ def setcovariance(self, model, cov): self.cov = tempcov else: # Create new covariance matrix, making sure to account for fixed pars - self.cov = np.matrix(np.zeros((n,n))) + self.cov = np.matrix(np.zeros((n, n))) - i=0 - rawi=0 + i = 0 + rawi = 0 for i in range(n): j = 0 rawj = 0 if free[i]: for j in range(n): if free[j]: - self.cov[i,j] = cov[rawi,rawj] + self.cov[i, j] = cov[rawi, rawj] rawj += 1 rawi += 1 - def transform(self, in_format, out_format, **kwds): """Transform parameters and covariance matrix under specified change of variables. @@ -168,7 +172,7 @@ def transform(self, in_format, out_format, **kwds): if "parts" in kwds: if kwds["parts"] == "peaks": - parts = range(len(self.model)-1) + parts = range(len(self.model) - 1) elif kwds["parts"] == "baseline": parts = [-1] else: @@ -189,33 +193,41 @@ def transform(self, in_format, out_format, **kwds): for i in parts: start = self.mmap[i][0] - stop = self.mmap[i][-1]+1 + stop = self.mmap[i][-1] + 1 p = self.model[i] try: subg = p.owner().transform_derivatives(p.pars, in_format, out_format) except NotImplementedError: - logger.warning("Transformation gradient not implemented for part %i: %s. Ignoring transformation." %(i, str(p))) + logger.warning( + "Transformation gradient not implemented for part %i: %s. Ignoring transformation." + % (i, str(p)) + ) subg = np.identity(p.npars(True)) except Exception as e: - logger.warning("Transformation gradient failed for part %i: %s. Failed with message %s. Ignoring transformation." %(i, str(p), str(e))) + logger.warning( + "Transformation gradient failed for part %i: %s. Failed with message %s. Ignoring transformation." + % (i, str(p), str(e)) + ) subg = np.identity(p.npars(True)) # Now transform the parameters to match try: p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) except Exception as e: - logger.warning("Parameter transformation failed for part %i: %s. Failed with message %s. Ignoring transformation." %(i, str(p), str(e))) + logger.warning( + "Parameter transformation failed for part %i: %s. Failed with message %s. Ignoring transformation." + % (i, str(p), str(e)) + ) subg = np.identity(p.npars(True)) # Update the global gradient matrix g[start:stop, start:stop] = subg g = np.matrix(g) - self.cov = np.array(g*np.matrix(self.cov).transpose()*g) + self.cov = np.array(g * np.matrix(self.cov).transpose() * g) return - def getcorrelation(self, i, j): """Return the correlation between variables i and j, Corr_ij=Cov_ij/(sigma_i sigma_j) @@ -233,10 +245,12 @@ def getcorrelation(self, i, j): i1 = self.pmap[i] if i in self.pmap else i j1 = self.pmap[j] if j in self.pmap else j - if self.cov[i1,i1] == 0. or self.cov[j1,j1] == 0.: - return 0. # Avoiding undefined quantities is sensible in this context. + if self.cov[i1, i1] == 0.0 or self.cov[j1, j1] == 0.0: + return 0.0 # Avoiding undefined quantities is sensible in this context. else: - return self.cov[i1,j1]/(np.sqrt(self.cov[i1,i1])*np.sqrt(self.cov[j1,j1])) + return self.cov[i1, j1] / ( + np.sqrt(self.cov[i1, i1]) * np.sqrt(self.cov[j1, j1]) + ) def getvalue(self, i): """Return value of parameter i. @@ -254,7 +268,7 @@ def getuncertainty(self, i): which indicate the mth parameter of modelpart l. """ (l, m) = i if i in self.pmap else self.ipmap[i] - return np.sqrt(self.getcovariance(i,i)) + return np.sqrt(self.getcovariance(i, i)) def getcovariance(self, i, j): """Return the covariance between variables i and j. @@ -270,7 +284,7 @@ def getcovariance(self, i, j): i1 = self.pmap[i] if i in self.pmap else i j1 = self.pmap[j] if j in self.pmap else j - return self.cov[i1,j1] + return self.cov[i1, j1] def get(self, i): """Return (value, uncertainty) tuple for parameter i. @@ -298,9 +312,9 @@ def correlationwarning(self, threshold=0.8): correlated = [] for i in range(self.cov.shape[0]): - for j in range(i+1, self.cov.shape[0]): - c = self.getcorrelation(i,j) - if c and np.abs(c) > threshold: # filter out None values + for j in range(i + 1, self.cov.shape[0]): + c = self.getcorrelation(i, j) + if c and np.abs(c) > threshold: # filter out None values correlated.append((self.ipmap[i], self.ipmap[j], c)) return correlated @@ -310,7 +324,7 @@ def __str__(self): return "Model and/or Covariance matrix undefined." lines = [] for i, m in enumerate(self.model): - lines.append(" ".join([self.prettypar((i,j)) for j in range(len(m))])) + lines.append(" ".join([self.prettypar((i, j)) for j in range(len(m))])) return "\n".join(lines) def prettypar(self, i): @@ -322,11 +336,12 @@ def prettypar(self, i): if self.model is None or self.cov is None: return "Model and/or Covariance matrix undefined." k = i if i in self.ipmap else self.pmap[i] - return "%.5e (%.5e)" %(self.getvalue(k), np.sqrt(self.getcovariance(k,k))) + return "%.5e (%.5e)" % (self.getvalue(k), np.sqrt(self.getcovariance(k, k))) # End of class ModelCovariance + class ModelCluster(object): """Associate a contiguous cluster of data with an appropriate model. @@ -399,7 +414,7 @@ def __init__(self, model, *args, **kwds): self.error_method = orig.error_method self.peak_funcs = list(orig.peak_funcs) return - else: # Explicit creation + else: # Explicit creation if model is None: self.model = Peaks([]) else: @@ -468,26 +483,28 @@ def writestr(self, **kwds): if self.peak_funcs is None: lines.append("peak_funcs=None") else: - lines.append("peak_funcs=%s" %repr([pfbaselist.index(p) for p in self.peak_funcs])) + lines.append( + "peak_funcs=%s" % repr([pfbaselist.index(p) for p in self.peak_funcs]) + ) if self.error_method is None: - lines.append('ModelEvaluator=None') + lines.append("ModelEvaluator=None") else: - lines.append('ModelEvaluator=%s' %self.error_method.__name__) + lines.append("ModelEvaluator=%s" % self.error_method.__name__) - lines.append("slice=%s" %repr(self.slice)) + lines.append("slice=%s" % repr(self.slice)) # Indexed baseline functions (unless externally provided) if writeblf: lines.append("## BaselineFunctions") for i, bf in enumerate(blfbaselist): - lines.append('# BaselineFunction %s' %i) + lines.append("# BaselineFunction %s" % i) lines.append(bf.writestr(blfbaselist)) # Indexed peak functions (unless externally provided) if writepf: lines.append("## PeakFunctions") for i, pf in enumerate(pfbaselist): - lines.append('# PeakFunction %s' %i) + lines.append("# PeakFunction %s" % i) lines.append(pf.writestr(pfbaselist)) lines.append("# BaselineObject") @@ -501,17 +518,16 @@ def writestr(self, **kwds): lines.append("None") else: for m in self.model: - lines.append('# ModelPeak') + lines.append("# ModelPeak") lines.append(m.writestr(pfbaselist)) # Raw data in modelcluster. - lines.append('### start data') - lines.append('#L r y dy') + lines.append("### start data") + lines.append("#L r y dy") for i in range(len(self.r_data)): - lines.append('%g %g %g' % \ - (self.r_data[i], self.y_data[i], self.y_error[i]) ) + lines.append("%g %g %g" % (self.r_data[i], self.y_data[i], self.y_error[i])) - datastring = "\n".join(lines)+"\n" + datastring = "\n".join(lines) + "\n" return datastring @staticmethod @@ -545,101 +561,98 @@ def factory(mcstr, **kwds): # - StartData # find data section, and what information it contains - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', mcstr, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", mcstr, re.M) if res: - start_data = mcstr[res.end():].strip() - start_data_info = mcstr[res.start():res.end()] - header = mcstr[:res.start()] - res = re.search(r'^(#+L.*)$', start_data_info, re.M) + start_data = mcstr[res.end() :].strip() + start_data_info = mcstr[res.start() : res.end()] + header = mcstr[: res.start()] + res = re.search(r"^(#+L.*)$", start_data_info, re.M) if res: - start_data_info = start_data_info[res.start():res.end()].strip() + start_data_info = start_data_info[res.start() : res.end()].strip() hasr = False hasy = False hasdy = False - res = re.search(r'\br\b', start_data_info) + res = re.search(r"\br\b", start_data_info) if res: hasr = True - res = re.search(r'\by\b', start_data_info) + res = re.search(r"\by\b", start_data_info) if res: hasy = True - res = re.search(r'\bdy\b', start_data_info) + res = re.search(r"\bdy\b", start_data_info) if res: hasdy = True # Model - res = re.search(r'^#+ ModelPeaks.*$', header, re.M) + res = re.search(r"^#+ ModelPeaks.*$", header, re.M) if res: - model_peaks = header[res.end():].strip() - header = header[:res.start()] + model_peaks = header[res.end() :].strip() + header = header[: res.start()] # Baseline Object - res = re.search(r'^#+ BaselineObject\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ BaselineObject\s*(?:#.*\s+)*", header, re.M) if res: - baselineobject = header[res.end():].strip() - header = header[:res.start()] + baselineobject = header[res.end() :].strip() + header = header[: res.start()] # Peak functions if readpf: - res = re.search(r'^#+ PeakFunctions.*$', header, re.M) + res = re.search(r"^#+ PeakFunctions.*$", header, re.M) if res: - peakfunctions = header[res.end():].strip() - header = header[:res.start()] + peakfunctions = header[res.end() :].strip() + header = header[: res.start()] # Baseline functions if readblf: - res = re.search(r'^#+ BaselineFunctions.*$', header, re.M) + res = re.search(r"^#+ BaselineFunctions.*$", header, re.M) if res: - baselinefunctions = header[res.end():].strip() - header = header[:res.start()] + baselinefunctions = header[res.end() :].strip() + header = header[: res.start()] ### Instantiating baseline functions if readblf: blfbaselist = [] - res = re.split(r'(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*', baselinefunctions) + res = re.split( + r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions + ) for s in res[1:]: blfbaselist.append(BaseFunction.factory(s, blfbaselist)) ### Instantiating peak functions if readpf: pfbaselist = [] - res = re.split(r'(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*', peakfunctions) + res = re.split(r"(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*", peakfunctions) for s in res[1:]: pfbaselist.append(BaseFunction.factory(s, pfbaselist)) - ### Instantiating header data # peak_funcs - res = re.search(r'^peak_funcs=(.*)$', header, re.M) + res = re.search(r"^peak_funcs=(.*)$", header, re.M) peak_funcs = eval(res.groups()[0].strip()) if peak_funcs is not None: peak_funcs = [pfbaselist[i] for i in peak_funcs] # error_method - res = re.search(r'^ModelEvaluator=(.*)$', header, re.M) + res = re.search(r"^ModelEvaluator=(.*)$", header, re.M) __import__("diffpy.srmise.modelevaluators") module = sys.modules["diffpy.srmise.modelevaluators"] error_method = getattr(module, res.groups()[0].strip()) # slice - res = re.search(r'^slice=(.*)$', header, re.M) + res = re.search(r"^slice=(.*)$", header, re.M) cluster_slice = eval(res.groups()[0].strip()) - ### Instantiating BaselineObject - if re.match(r'^None$', baselineobject): + if re.match(r"^None$", baselineobject): baseline = None else: baseline = Baseline.factory(baselineobject, blfbaselist) - ### Instantiating model model = Peaks() - res = re.split(r'(?m)^#+ ModelPeak\s*(?:#.*\s+)*', model_peaks) + res = re.split(r"(?m)^#+ ModelPeak\s*(?:#.*\s+)*", model_peaks) for s in res[1:]: model.append(Peak.factory(s, pfbaselist)) - - ### Instantiating start data # read actual data - r, y, dy arrays = [] @@ -663,10 +676,13 @@ def factory(mcstr, **kwds): for line in start_data.split("\n"): l = line.split() if len(arrays) != len(l): - emsg = ("Number of value fields does not match that given by '%s'" %start_data_info) + emsg = ( + "Number of value fields does not match that given by '%s'" + % start_data_info + ) for a, v in zip(arrays, line.split()): a.append(float(v)) - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise SrMiseDataFormatError(err) if hasr: r_data = np.array(r_data) @@ -675,8 +691,16 @@ def factory(mcstr, **kwds): if hasdy: y_error = np.array(y_error) - return ModelCluster(model, baseline, r_data, y_data, y_error, cluster_slice, error_method, peak_funcs) - + return ModelCluster( + model, + baseline, + r_data, + y_data, + y_error, + cluster_slice, + error_method, + peak_funcs, + ) @staticmethod def join_adjacent(m1, m2): @@ -724,11 +748,11 @@ def join_adjacent(m1, m2): if not right_ids[0] == left_ids[1]: raise ValueError("Given ModelClusters are not adjacent.") - new_slice=slice(left_ids[0], right_ids[1], 1) + new_slice = slice(left_ids[0], right_ids[1], 1) # Approximately where the clusters meet. - border_x = .5*(left.r_data[left_ids[1]-1] + right.r_data[right_ids[0]]) - border_y = .5*(left.y_data[left_ids[1]-1] + right.y_data[right_ids[0]]) + border_x = 0.5 * (left.r_data[left_ids[1] - 1] + right.r_data[right_ids[0]]) + border_y = 0.5 * (left.y_data[left_ids[1] - 1] + right.y_data[right_ids[0]]) if len(m1.model) > 0 and len(m2.model) > 0: new_model = left.model.copy() @@ -741,24 +765,40 @@ def join_adjacent(m1, m2): # border_x are removed. The highly unlikely case of two peaks # exactly at the border is also handled. for i in reversed(range(len(new_model))): - if new_model[i]["position"] == border_x and \ - i > 0 and new_model[i-1]["position"] == border_x: + if ( + new_model[i]["position"] == border_x + and i > 0 + and new_model[i - 1]["position"] == border_x + ): del new_model[i] elif new_ids[i] != i: - if (new_model[i]["position"] > border_x and new_ids[i] < len(left.model)) and \ - (new_model[i]["position"] < border_x and new_ids[i] >= len(left.model)): + if ( + new_model[i]["position"] > border_x + and new_ids[i] < len(left.model) + ) and ( + new_model[i]["position"] < border_x + and new_ids[i] >= len(left.model) + ): del new_model[i] # Likely to improve any future fitting new_model.match_at(border_x, border_y) elif len(m1.model) > 0: new_model = m1.model.copy() - else: # Only m2 has entries, or both are empty + else: # Only m2 has entries, or both are empty new_model = m2.model.copy() - peak_funcs = list(set(m1.peak_funcs) | set(m2.peak_funcs)) # "Union" - return ModelCluster(new_model, m1.baseline, m1.r_data, m1.y_data, - m1.y_error, new_slice, m1.error_method, peak_funcs) + peak_funcs = list(set(m1.peak_funcs) | set(m2.peak_funcs)) # "Union" + return ModelCluster( + new_model, + m1.baseline, + m1.r_data, + m1.y_data, + m1.y_error, + new_slice, + m1.error_method, + peak_funcs, + ) def change_slice(self, new_slice): """Change the slice which represents the extent of a cluster.""" @@ -786,11 +826,15 @@ def change_slice(self, new_slice): # check if slice has expanded on the left if self.never_fit and self.slice.start < old_slice.start: left_slice = slice(self.slice.start, old_slice.start) - self.never_fit = max(y_data_nobl[left_slice] - self.y_error[left_slice]) < 0 + self.never_fit = ( + max(y_data_nobl[left_slice] - self.y_error[left_slice]) < 0 + ) # check if slice has expanded on the right if self.never_fit and self.slice.stop > old_slice.stop: right_slice = slice(old_slice.stop, self.slice.stop) - self.never_fit = max(y_data_nobl[right_slice] - self.y_error[right_slice]) < 0 + self.never_fit = ( + max(y_data_nobl[right_slice] - self.y_error[right_slice]) < 0 + ) return @@ -808,7 +852,7 @@ def npars(self, count_baseline=True, count_fixed=True): n += self.baseline.npars(count_fixed=count_fixed) return n - def replacepeaks(self, newpeaks, delslice=slice(0,0)): + def replacepeaks(self, newpeaks, delslice=slice(0, 0)): """Replace peaks given by delslice by those in newpeaks. Parameters @@ -824,7 +868,7 @@ def replacepeaks(self, newpeaks, delslice=slice(0,0)): def deletepeak(self, idx): """Delete the peak at the given index.""" - self.replacepeaks([], slice(idx,idx+1)) + self.replacepeaks([], slice(idx, idx + 1)) def estimatepeak(self): """Attempt to add single peak to empty cluster. Return True if successful.""" @@ -840,18 +884,27 @@ def estimatepeak(self): # throw some exception pass selected = self.peak_funcs[0] - estimate = selected.estimate_parameters(self.r_cluster, self.y_cluster - self.valuebl()) - + estimate = selected.estimate_parameters( + self.r_cluster, self.y_cluster - self.valuebl() + ) if estimate is not None: newpeak = selected.actualize(estimate, "internal") - logger.info("Estimate: %s" %newpeak) + logger.info("Estimate: %s" % newpeak) self.replacepeaks(Peaks([newpeak])) return True else: return False - def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=None, cov_format="default_output"): + def fit( + self, + justify=False, + ntrials=0, + fitbaseline=False, + estimate=True, + cov=None, + cov_format="default_output", + ): """Perform a chi-square fit of the model to data in cluster. Parameters @@ -871,11 +924,11 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No if self.never_fit: return None if len(self.model) == 0: - #Attempt to add a first peak to the cluster + # Attempt to add a first peak to the cluster if estimate: try: self.estimatepeak() - except SrMiseEstimationError, e: + except SrMiseEstimationError as e: logger.info("Fit: No model to fit, estimation not possible.") return else: @@ -890,7 +943,11 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No orig_baseline = self.baseline.copy() self.last_fit_size = self.size - if fitbaseline and self.baseline is not None and self.baseline.npars(count_fixed=False) > 0: + if ( + fitbaseline + and self.baseline is not None + and self.baseline.npars(count_fixed=False) > 0 + ): y_datafit = self.y_data fmodel = ModelParts(self.model) fmodel.append(self.baseline) @@ -899,20 +956,28 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No fmodel = self.model try: - fmodel.fit(self.r_data, - y_datafit, - self.y_error, - self.slice, - ntrials, - cov, - cov_format) - except SrMiseFitError, e: - logger.debug("Error while fitting cluster: %s\nReverting to original model.", e) + fmodel.fit( + self.r_data, + y_datafit, + self.y_error, + self.slice, + ntrials, + cov, + cov_format, + ) + except SrMiseFitError as e: + logger.debug( + "Error while fitting cluster: %s\nReverting to original model.", e + ) self.model = orig_model self.baseline = orig_baseline return None - if fitbaseline and self.baseline is not None and self.baseline.npars(count_fixed=False) > 0: + if ( + fitbaseline + and self.baseline is not None + and self.baseline.npars(count_fixed=False) > 0 + ): self.model = Peaks(fmodel[:-1]) self.baseline = fmodel[-1] else: @@ -925,17 +990,15 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No # Test for fit improvement if new_qual < orig_qual: # either fit blew up (and leastsq didn't notice) or the fit had already converged. - msg = ["ModelCluster.fit() warning: fit seems not to have improved.", - "Reverting to original model.", - "----------", - "New Quality: %s", - "Original Quality: %s" - "%s", - "----------"] - logger.debug("\n".join(msg), - new_qual.stat, - orig_qual.stat, - self.model) + msg = [ + "ModelCluster.fit() warning: fit seems not to have improved.", + "Reverting to original model.", + "----------", + "New Quality: %s", + "Original Quality: %s" "%s", + "----------", + ] + logger.debug("\n".join(msg), new_qual.stat, orig_qual.stat, self.model) self.model = orig_model self.baseline = orig_baseline @@ -953,9 +1016,11 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No # original fit is less likely to obscure any hidden peaks. if justify and len(self.model) == 1 and len(orig_model) > 0: min_npars = min([p.npars for p in self.peak_funcs]) - if new_qual.growth_justified(self, min_npars): - msg = ["ModelCluster.fit(): Fit over current cluster better explained by additional peaks.", - "Reverting to original model."] + if new_qual.growth_justified(self, min_npars): + msg = [ + "ModelCluster.fit(): Fit over current cluster better explained by additional peaks.", + "Reverting to original model.", + ] logger.debug("\n".join(msg)) self.model = orig_model @@ -974,28 +1039,35 @@ def contingent_fit(self, minpoints, growth_threshold): """ if self.never_fit: return None - if (self.last_fit_size > 0 and float(self.size)/self.last_fit_size >= growth_threshold) \ - or (self.last_fit_size == 0 and self.size >= minpoints): + if ( + self.last_fit_size > 0 + and float(self.size) / self.last_fit_size >= growth_threshold + ) or (self.last_fit_size == 0 and self.size >= minpoints): return self.fit(justify=True) return None def cleanfit(self): """Remove poor-quality peaks in the fit. Return number removed.""" - #Find peaks located outside the cluster + # Find peaks located outside the cluster pos = np.array([p["position"] for p in self.model]) left_idx = pos.searchsorted(self.r_cluster[0]) right_idx = pos.searchsorted(self.r_cluster[-1]) outside_idx = range(0, left_idx) outside_idx.extend(range(right_idx, len(self.model))) - #inside_idx = range(left_idx, right_idx) + # inside_idx = range(left_idx, right_idx) # Identify outside peaks that contribute < error everywhere in cluster. # Must check entire cluster and not just nearest endpoint because not # every peak function can be assumed to have its greatest contribution # there, and errors are not necessarily constant. - outside_idx = [i for i in outside_idx \ - if (self.model[i].removable \ - and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0)] + outside_idx = [ + i + for i in outside_idx + if ( + self.model[i].removable + and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0 + ) + ] # TODO: Check for peaks that have blown up. # Remember to check if a peak is removable. @@ -1003,10 +1075,14 @@ def cleanfit(self): # NaN is too serious not to remove, even if removable is False, but I should look # into better handling anyway. - nan_idx = [i for i in range(len(self.model)) if np.isnan(self.model[i].pars).any()] + nan_idx = [ + i for i in range(len(self.model)) if np.isnan(self.model[i].pars).any() + ] if len(outside_idx) > 0: - msg = ["Following peaks outside cluster made no contribution within it and were removed:"] + msg = [ + "Following peaks outside cluster made no contribution within it and were removed:" + ] msg.extend([str(self.model[i]) for i in outside_idx]) logger.debug("\n".join(msg)) @@ -1015,11 +1091,11 @@ def cleanfit(self): msg.extend([str(self.model[i]) for i in nan_idx]) logger.debug("\n".join(msg)) -# # TODO: Uncomment when there's a point! -# if len(blown_idx) > 0: -# msg = ["Following peaks inside cluster were too large and had to be removed:"] -# msg.extend([str(self.model[i]) for i in blown_idx]) -# logger.info("\n".join(msg)) + # # TODO: Uncomment when there's a point! + # if len(blown_idx) > 0: + # msg = ["Following peaks inside cluster were too large and had to be removed:"] + # msg.extend([str(self.model[i]) for i in blown_idx]) + # logger.info("\n".join(msg)) # A peak can only be removed once. to_remove = list(set(outside_idx) | set(blown_idx) | set(nan_idx)) @@ -1048,7 +1124,7 @@ def reduce_to(self, x, y): logger.debug("reduce_to: No reduction necessary.") return None orig_model = self.model.copy() - self.model.match_at(x, y-self.valuebl(x)) + self.model.match_at(x, y - self.valuebl(x)) quality = self.fit() # Did reduction help? @@ -1067,13 +1143,15 @@ def reduce_to(self, x, y): def value(self, r=None): """Return value of baseline+model over cluster.""" - if len(self.model)==0: + if len(self.model) == 0: return self.valuebl(r) else: if r is None: - return self.valuebl(r)+(self.model.value(self.r_data, self.slice)[self.slice]) + return self.valuebl(r) + ( + self.model.value(self.r_data, self.slice)[self.slice] + ) else: - return self.valuebl(r)+(self.model.value(r)) + return self.valuebl(r) + (self.model.value(r)) def valuebl(self, r=None): """Return baseline's value over cluster. @@ -1088,10 +1166,10 @@ def valuebl(self, r=None): if r is None: return np.zeros(self.size) else: - return r*0. + return r * 0.0 else: if r is None: - return (self.baseline.value(self.r_data, self.slice)[self.slice]) + return self.baseline.value(self.r_data, self.slice)[self.slice] else: return self.baseline.value(r) @@ -1130,9 +1208,9 @@ def plottable(self, joined=False): else: toreturn = [self.r_cluster, self.y_cluster] bl = self.valuebl() - toreturn.extend([self.r_cluster for i in range(2*len(self.model))]) + toreturn.extend([self.r_cluster for i in range(2 * len(self.model))]) for i, p in enumerate(self.model): - toreturn[2*i+3] = bl + p.value(self.r_data, self.slice)[self.slice] + toreturn[2 * i + 3] = bl + p.value(self.r_data, self.slice)[self.slice] return toreturn def plottable_residual(self): @@ -1149,16 +1227,15 @@ def augment(self, source): best_qual = self.quality() source_model = source.model.copy() - msg = ["==== Augmenting model ====", - "Original fit:", - "%s", - "w/ quality: %s", - "New model fits:", - "%s"] - logger.debug("\n".join(msg), - best_model, - best_qual.stat, - source_model) + msg = [ + "==== Augmenting model ====", + "Original fit:", + "%s", + "w/ quality: %s", + "New model fits:", + "%s", + ] + logger.debug("\n".join(msg), best_model, best_qual.stat, source_model) # Each iteration improves best_model by adding the peak from # source_model to best_model that most improves its quality, breaking @@ -1181,28 +1258,27 @@ def augment(self, source): best_model = test_models[args[-1]] del source_model[args[-1]] else: - break # Best possible model has been found. + break # Best possible model has been found. self.replacepeaks(best_model, slice(len(self.model))) # TODO: Do I need this? If test_model contains peaks # by reference, the fit peaks will change as well. self.fit() - msg = ["Best model after fit is:", - "%s", - "w/ quality: %s", - "================="] - logger.debug("\n".join(msg), - self.model, - best_qual.stat) + msg = ["Best model after fit is:", "%s", "w/ quality: %s", "================="] + logger.debug("\n".join(msg), self.model, best_qual.stat) return def __str__(self): """Return string representation of the cluster.""" - return '\n'.join(["Slice: %s" %self.slice, - "Quality: %s" %self.quality().stat, - "Baseline: %s" %self.baseline, - "Peaks:\n%s" %self.model]) + return "\n".join( + [ + "Slice: %s" % self.slice, + "Quality: %s" % self.quality().stat, + "Baseline: %s" % self.baseline, + "Peaks:\n%s" % self.model, + ] + ) def prune(self): """Remove peaks until model quality no longer improves. @@ -1223,10 +1299,19 @@ def prune(self): tracer.pushc() y_nobl = self.y_cluster - self.valuebl() - prune_mc = ModelCluster(None, None, self.r_cluster, y_nobl, self.error_cluster, None, self.error_method, self.peak_funcs) + prune_mc = ModelCluster( + None, + None, + self.r_cluster, + y_nobl, + self.error_cluster, + None, + self.error_method, + self.peak_funcs, + ) orig_model = self.model.copy() - peak_range = 3 # number of peaks on either side of deleted peak to fit + peak_range = 3 # number of peaks on either side of deleted peak to fit check_models = [] for m in orig_model: if m.removable: @@ -1237,16 +1322,11 @@ def prune(self): best_model = self.model.copy() best_qual = self.quality() - msg = ["====Pruning fits:====", - "Original model:", - "%s", - "w/ quality: %s"] - logger.info("\n".join(msg), - best_model, - best_qual.stat) + msg = ["====Pruning fits:====", "Original model:", "%s", "w/ quality: %s"] + logger.info("\n".join(msg), best_model, best_qual.stat) #### Main prune loop #### - while(check_models.count(None) < len(check_models)): + while check_models.count(None) < len(check_models): # Cache value of individual peaks for best current model. best_modely = [] @@ -1271,57 +1351,50 @@ def prune(self): for i in range(len(check_models)): if check_models[i] is not None: # Create model with ith peak removed, and distant peaks effectively fixed - lo = max(i-peak_range, 0) - hi = min(i+peak_range+1, len(best_model)) + lo = max(i - peak_range, 0) + hi = min(i + peak_range + 1, len(best_model)) check_models[i] = best_model[lo:i].copy() - check_models[i].extend(best_model[i+1:hi].copy()) + check_models[i].extend(best_model[i + 1 : hi].copy()) prune_mc.model = check_models[i] - msg = ["len(check_models): %s", - "len(best_model): %s", - "i: %s"] - logger.debug("\n".join(msg), - len(check_models), - len(best_model), - i) - - addpars = best_model.npars() - check_models[i].npars() - best_model[i].npars(count_fixed=False) + msg = ["len(check_models): %s", "len(best_model): %s", "i: %s"] + logger.debug("\n".join(msg), len(check_models), len(best_model), i) + addpars = ( + best_model.npars() + - check_models[i].npars() + - best_model[i].npars(count_fixed=False) + ) # Remove contribution of (effectively) fixed peaks y = np.array(y_nobl) if lo > 0: - logger.debug("len(sum): %s", len(np.sum(best_modely[:lo], axis=0))) + logger.debug( + "len(sum): %s", len(np.sum(best_modely[:lo], axis=0)) + ) y -= np.sum(best_modely[:lo], axis=0) if hi < len(best_modely): y -= np.sum(best_modely[hi:], axis=0) prune_mc.y_data = y prune_mc.y_cluster = y - msg = ["", - "--- %s ---", - "Removed peak: %s", - "Starting model:", - "%s"] - logger.debug("\n".join(msg), - i, - best_model[i], - prune_mc.model) + msg = [ + "", + "--- %s ---", + "Removed peak: %s", + "Starting model:", + "%s", + ] + logger.debug("\n".join(msg), i, best_model[i], prune_mc.model) - prune_mc.fit(ntrials=int(np.sqrt(len(y))+50), estimate=False) + prune_mc.fit(ntrials=int(np.sqrt(len(y)) + 50), estimate=False) qual = prune_mc.quality(kshift=addpars) check_qual = np.append(check_qual, qual) check_qualidx = np.append(check_qualidx, i) - msg = ["Found model:", - "%s", - "addpars: %s", - "qual: %s"] - logger.debug("\n".join(msg), - prune_mc.model, - addpars, - qual.stat) + msg = ["Found model:", "%s", "addpars: %s", "qual: %s"] + logger.debug("\n".join(msg), prune_mc.model, addpars, qual.stat) # Do not check this peak in the future if quality decreased. if qual < best_qual: @@ -1329,21 +1402,22 @@ def prune(self): arg = check_qual.argsort() - msg = [" - Finished round of pruning -", - "best_qual: %s", - "check_qual: %s", - "sorted check_qual: %s"] - logger.debug("\n".join(msg), - best_qual.stat, - [c.stat for c in check_qual], - arg) + msg = [ + " - Finished round of pruning -", + "best_qual: %s", + "check_qual: %s", + "sorted check_qual: %s", + ] + logger.debug( + "\n".join(msg), best_qual.stat, [c.stat for c in check_qual], arg + ) arg = arg[-1] newbest_qual = check_qual[arg] newbest_qualidx = check_qualidx[arg] if newbest_qual > best_qual: - lo = max(newbest_qualidx-peak_range, 0) - hi = min(newbest_qualidx+peak_range+1, len(orig_model)) + lo = max(newbest_qualidx - peak_range, 0) + hi = min(newbest_qualidx + peak_range + 1, len(orig_model)) bmtemp = best_model[:lo] bmtemp.extend(check_models[newbest_qualidx]) bmtemp.extend(best_model[hi:]) @@ -1360,12 +1434,8 @@ def prune(self): self.model = best_model tracer.emit(self) - msg = ["New best model:", - "%s", - "best_qual: %s"] - logger.debug("\n".join(msg), - best_model, - best_qual.stat) + msg = ["New best model:", "%s", "best_qual: %s"] + logger.debug("\n".join(msg), best_model, best_qual.stat) if len(best_model) > 0: del check_models[newbest_qualidx] @@ -1378,48 +1448,49 @@ def prune(self): else: break - msg = ["Best model after pruning is:", - "%s", - "w/ quality: %s", - "================="] - logger.info("\n".join(msg), - self.model, - self.quality().stat) + msg = [ + "Best model after pruning is:", + "%s", + "w/ quality: %s", + "=================", + ] + logger.info("\n".join(msg), self.model, self.quality().stat) tracer.popc() return + # simple test code -if __name__ == '__main__': +if __name__ == "__main__": from numpy.random import randn from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import GaussianOverR - pf = GaussianOverR(.7) - res = .01 + pf = GaussianOverR(0.7) + res = 0.01 - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - r = np.arange(2,4,res) + r = np.arange(2, 4, res) y = ideal_peaks.value(r) + randn(len(r)) err = np.ones(len(r)) evaluator = AICc() - guesspars = [[2.9, .15, 5], [3.6, .3, 5]] + guesspars = [[2.9, 0.15, 5], [3.6, 0.3, 5]] guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, None, r, y, err, None, AICc, [pf]) - print "--- Actual Peak parameters ---" - print ideal_peaks + print("--- Actual Peak parameters ---") + print(ideal_peaks) - print "\n--- Before fit ---" - print cluster + print("\n--- Before fit ---") + print(cluster) cluster.fit() - print "\n--- After fit ---" - print cluster + print("\n--- After fit ---") + print(cluster) diff --git a/diffpy/srmise/modelparts.py b/diffpy/srmise/modelparts.py index 1cb6412..4943e0a 100644 --- a/diffpy/srmise/modelparts.py +++ b/diffpy/srmise/modelparts.py @@ -34,8 +34,8 @@ # Before it returned a scalar, later it returned an array of length 1. import pkg_resources as pr -__spv__ = pr.get_distribution('scipy').version -__oldleastsqbehavior__ = (pr.parse_version(__spv__) < pr.parse_version('0.8.0')) +__spv__ = pr.get_distribution("scipy").version +__oldleastsqbehavior__ = pr.parse_version(__spv__) < pr.parse_version("0.8.0") class ModelParts(list): @@ -57,7 +57,16 @@ class ModelParts(list): def __init__(self, *args, **kwds): list.__init__(self, *args, **kwds) - def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="default_output"): + def fit( + self, + r, + y, + y_error, + range=None, + ntrials=0, + cov=None, + cov_format="default_output", + ): """Chi-square fit of all free parameters to given data. There must be at least as many free parameters as data points. @@ -75,12 +84,17 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul """ freepars = self.unpack_freepars() if len(freepars) >= len(r): - emsg = "Cannot fit model with " + str(len(freepars)) +\ - " free parametersbut only "+str(len(r)) + " data points." + emsg = ( + "Cannot fit model with " + + str(len(freepars)) + + " free parametersbut only " + + str(len(r)) + + " data points." + ) raise SrMiseFitError(emsg) if len(freepars) == 0: - #emsg = "Cannot fit model with no free parameters." - #raise SrMiseFitError(emsg) + # emsg = "Cannot fit model with no free parameters." + # raise SrMiseFitError(emsg) return if range == None: @@ -95,38 +109,48 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul plt.cla() plt.title("Before") plt.plot(r, y, label="_nolabel_") - plt.plot(r, (y-self.value(r, range=range))-1.1*(max(y) - min(y)), label="_nolabel_") + plt.plot( + r, + (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), + label="_nolabel_", + ) for p in self: plt.plot(r, p.value(r, range=range), label=str(p)) plt.ion() try: f = leastsq( - self.residual, # minimize this function - freepars, # initial parameters - args=args, # arguments to residual, residual_jacobian - Dfun=self.residual_jacobian, # explicit Jacobian - col_deriv=1, # order of derivatives in Jacobian + self.residual, # minimize this function + freepars, # initial parameters + args=args, # arguments to residual, residual_jacobian + Dfun=self.residual_jacobian, # explicit Jacobian + col_deriv=1, # order of derivatives in Jacobian full_output=1, - maxfev=ntrials) + maxfev=ntrials, + ) except NotImplementedError: # TODO: Figure out if is worth checking for residual_jacobian # before leastsq(). This exception will either occur almost never # or extremely frequently, and the extra evaluations will add up. logger.info("One or more functions do not define residual_jacobian().") f = leastsq( - self.residual, # minimize this function - freepars, # initial parameters - args=args, # arguments to residual - col_deriv=1, # order of derivatives in Jacobian + self.residual, # minimize this function + freepars, # initial parameters + args=args, # arguments to residual + col_deriv=1, # order of derivatives in Jacobian full_output=1, - maxfev=ntrials) + maxfev=ntrials, + ) except Exception: # Sadly, KeyboardInterrupt, etc. is reraised as minpack.error # Not much I can do about that, though. import traceback - emsg = "Unexpected error in modelparts.fit(). Original exception:\n" +\ - traceback.format_exc() + "End original exception." + + emsg = ( + "Unexpected error in modelparts.fit(). Original exception:\n" + + traceback.format_exc() + + "End original exception." + ) raise SrMiseFitError(emsg) result = f[0] @@ -144,22 +168,34 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul plt.cla() plt.title("After") plt.ion() - plt.plot(r, y, - r, (y-self.value(r, range=range))-1.1*(max(y) - min(y)), - *[i for sublist in [[r, p.value(r, range=range)] for p in self] for i in sublist]) + plt.plot( + r, + y, + r, + (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), + *[ + i + for sublist in [[r, p.value(r, range=range)] for p in self] + for i in sublist + ] + ) plt.draw() if srmiselog.wait: - print "Press 'Enter' to continue...", - raw_input() + print( + "Press 'Enter' to continue...", + ) + input() - if f[4] not in (1,2,3,4): + if f[4] not in (1, 2, 3, 4): emsg = "Fit did not succeed -- " + str(f[3]) raise SrMiseFitError(emsg) # clean up parameters for p in self: - p.pars = p.owner().transform_parameters(p.pars, in_format="internal", out_format="internal") + p.pars = p.owner().transform_parameters( + p.pars, in_format="internal", out_format="internal" + ) # Supply estimated covariance matrix if requested. # The precise relationship between f[1] and estimated covariance matrix is a little unclear from @@ -169,28 +205,28 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul pcov = f[1] fvec = f[2]["fvec"] dof = len(r) - len(freepars) - cov.setcovariance(self, pcov*np.sum(fvec**2)/dof) + cov.setcovariance(self, pcov * np.sum(fvec**2) / dof) try: cov.transform(in_format="internal", out_format=cov_format) except SrMiseUndefinedCovarianceError as e: logger.warn("Covariance not defined. Fit may not have converged.") - return -#### Notes on the fit f -# f[0] = solution -# f[1] = Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. -# None if a singular matrix encountered (indicates very flat curvature in some direction). -# This matrix must be multiplied by the residual variance to get the covariance of the parameter -# estimates - see curve fit. -# f[2] = dictionary{nfev: int, fvec: array(), fjac: array(), ipvt: array(), qtf: array()} -# nfev - The number of function calls made -# fvec - function (residual) evaluated at solution -# fjac - "a permutation of the R matrix of a QR factorization of the final Jacobian." -# ipvt - integer array defining a permutation matrix P such that fjac*P=QR -# qtf - transpose(q)*fvec -# f[3] = message about results of fit -# f[4] = integer flag. Fit was successful on 1,2,3, or 4. Otherwise unsuccessful. + + #### Notes on the fit f + # f[0] = solution + # f[1] = Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. + # None if a singular matrix encountered (indicates very flat curvature in some direction). + # This matrix must be multiplied by the residual variance to get the covariance of the parameter + # estimates - see curve fit. + # f[2] = dictionary{nfev: int, fvec: array(), fjac: array(), ipvt: array(), qtf: array()} + # nfev - The number of function calls made + # fvec - function (residual) evaluated at solution + # fjac - "a permutation of the R matrix of a QR factorization of the final Jacobian." + # ipvt - integer array defining a permutation matrix P such that fjac*P=QR + # qtf - transpose(q)*fvec + # f[3] = message about results of fit + # f[4] = integer flag. Fit was successful on 1,2,3, or 4. Otherwise unsuccessful. def npars(self, count_fixed=True): """Return total number of parameters in all parts. @@ -201,7 +237,7 @@ def npars(self, count_fixed=True): """ n = 0 for p in self: - n+=p.npars(count_fixed=count_fixed) + n += p.npars(count_fixed=count_fixed) return n def pack_freepars(self, freepars): @@ -229,9 +265,9 @@ def residual(self, freepars, r, y_expected, y_error, range=None): try: if range is None: range = slice(0, len(r)) - return (y_expected[range]-total[range])/y_error[range] + return (y_expected[range] - total[range]) / y_error[range] except TypeError: - return (y_expected-total)/y_error + return (y_expected - total) / y_error def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): """Calculate the Jacobian of freepars. @@ -245,22 +281,24 @@ def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): All the data by default. """ if len(freepars) == 0: - raise ValueError("Argument freepars has length 0. The Jacobian " - "is only defined with >=1 free parameters.") + raise ValueError( + "Argument freepars has length 0. The Jacobian " + "is only defined with >=1 free parameters." + ) self.pack_freepars(freepars) - tempJac=[] + tempJac = [] for p in self: - tempJac[len(tempJac):] = p.jacobian(r, range) + tempJac[len(tempJac) :] = p.jacobian(r, range) # Since the residual is (expected - calculated) the jacobian # of the residual has a minus sign. - jac=-np.array([j for j in tempJac if j is not None]) + jac = -np.array([j for j in tempJac if j is not None]) try: if range is None: range = slice(0, len(r)) - return jac[:,range]/y_error[range] + return jac[:, range] / y_error[range] except TypeError: - return jac/y_error + return jac / y_error def value(self, r, range=None): """Calculate total value of all parts over range. @@ -270,14 +308,14 @@ def value(self, r, range=None): range - Slice object specifying region of r and y over which to fit. All the data by default. """ - total = r * 0. + total = r * 0.0 for p in self: total += p.value(r, range) return total def unpack_freepars(self): """Return array of all free parameters.""" - #To check: ravel() sometimes returns a reference and othertimes a copy. + # To check: ravel() sometimes returns a reference and othertimes a copy. # Do I need to use flatten() instead? return np.concatenate([p.compress() for p in self]).ravel() @@ -302,16 +340,16 @@ def covariance(self, format="internal", **kwds): try: idx = int(k[1:]) except ValueError: - emsg = "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." %k + emsg = ( + "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." + % k + ) raise ValueError(emsg) formats[int(k[1:])] = v - - return - def copy(self): """Return deep copy of this ModelParts. @@ -321,7 +359,7 @@ def copy(self): def __str__(self): """Return string representation of this ModelParts.""" - return ''.join([str(p)+"\n" for p in self]) + return "".join([str(p) + "\n" for p in self]) def __getslice__(self, i, j): """Extends list.__getslice__""" @@ -338,10 +376,15 @@ def transform(self, in_format="internal", out_format="internal"): try: p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) except ValueError: - logger.info("Invalid parameter transformation: Ignoring %s->%s for function of type %s." %(in_format, out_format, p.owner().getmodule())) + logger.info( + "Invalid parameter transformation: Ignoring %s->%s for function of type %s." + % (in_format, out_format, p.owner().getmodule()) + ) + # End of class ModelParts + class ModelPart(object): """Represents a single part (instance of some function) of a model. @@ -388,18 +431,22 @@ def __init__(self, owner, pars, free=None, removable=True, static_owner=False): self._owner = owner if len(pars) != owner.npars: - emsg = "The length of pars must equal the number of parameters "+\ - "specified by the model part owner." + emsg = ( + "The length of pars must equal the number of parameters " + + "specified by the model part owner." + ) raise ValueError(emsg) - self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart + self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart if free is None: self.free = np.array([True for p in pars], dtype=bool) else: self.free = np.array(free, dtype=bool) if len(self.free) != owner.npars: - emsg = "The length of free must be equal to the number of "+\ - "parameters specified by the model part owner." + emsg = ( + "The length of free must be equal to the number of " + + "parameters specified by the model part owner." + ) raise ValueError(emsg) self.removable = removable @@ -419,8 +466,10 @@ def changeowner(self, owner): emsg = "Cannot change owner if static_owner is True." raise SrMiseStaticOwnerError(emsg) if self._owner.npars != owner.npars: - emsg = "New owner specifies different number of parameters than "+\ - "original owner." + emsg = ( + "New owner specifies different number of parameters than " + + "original owner." + ) raise SrMiseStaticOwnerError(emsg) self._owner = owner @@ -453,8 +502,8 @@ def update(self, freepars): """ numfree = self.npars(count_fixed=False) if len(freepars) < numfree: - pass # raise "freepars does not have enough elements to - # update every unheld parameter." + pass # raise "freepars does not have enough elements to + # update every unheld parameter." # TODO: Check if I need to make copies here, or if references # to parameters are safe. self.pars[self.free] = freepars[:numfree] @@ -475,7 +524,9 @@ def copy(self): The original and the copy are completely independent, except they both reference the same owner.""" - return type(self).__call__(self._owner, self.pars, self.free, self.removable, self.static_owner) + return type(self).__call__( + self._owner, self.pars, self.free, self.removable, self.static_owner + ) def __getitem__(self, key_or_idx): """Return parameter of peak corresponding with key_or_idx. @@ -529,20 +580,26 @@ def npars(self, count_fixed=True): def __str__(self): """Return string representation of ModelPart parameters.""" - return str(self._owner.transform_parameters(self.pars, in_format="internal", out_format="default_output")) + return str( + self._owner.transform_parameters( + self.pars, in_format="internal", out_format="default_output" + ) + ) def __eq__(self, other): - """ """ + """ """ if hasattr(other, "_owner"): - return ((self._owner is other._owner) and - np.all(self.pars == other.pars) and - np.all(self.free == other.free) and - self.removable == other.removable) + return ( + (self._owner is other._owner) + and np.all(self.pars == other.pars) + and np.all(self.free == other.free) + and self.removable == other.removable + ) else: return False def __ne__(self, other): - """ """ + """ """ return not self == other def writestr(self, ownerlist): @@ -557,19 +614,20 @@ def writestr(self, ownerlist): emsg = "ownerlist does not contain this ModelPart's owner." raise ValueError(emsg) lines = [] - lines.append("owner=%s" %repr(ownerlist.index(self._owner))) - - #Lists/numpy arrays don't give full representation of long lists - lines.append("pars=[%s]" %", ".join([repr(p) for p in self.pars])) - lines.append("free=[%s]" %", ".join([repr(f) for f in self.free])) - lines.append("removable=%s" %repr(self.removable)) - lines.append("static_owner=%s" %repr(self.static_owner)) - datastring = "\n".join(lines)+"\n" + lines.append("owner=%s" % repr(ownerlist.index(self._owner))) + + # Lists/numpy arrays don't give full representation of long lists + lines.append("pars=[%s]" % ", ".join([repr(p) for p in self.pars])) + lines.append("free=[%s]" % ", ".join([repr(f) for f in self.free])) + lines.append("removable=%s" % repr(self.removable)) + lines.append("static_owner=%s" % repr(self.static_owner)) + datastring = "\n".join(lines) + "\n" return datastring + # End of class ModelPart # simple test code -if __name__ == '__main__': +if __name__ == "__main__": pass diff --git a/diffpy/srmise/multimodelselection.py b/diffpy/srmise/multimodelselection.py index 35d9e08..01995e2 100644 --- a/diffpy/srmise/multimodelselection.py +++ b/diffpy/srmise/multimodelselection.py @@ -23,13 +23,15 @@ logger = logging.getLogger("diffpy.srmise") + def eatkwds(*args, **kwds): """Convenience function to remove all keywords in args from kwds.""" for k in args: if k in kwds: - print "Keyword %s=%s ignored." %(k, kwds.pop(k)) + print("Keyword %s=%s ignored." % (k, kwds.pop(k))) return kwds + class MultimodelSelection(PeakStability): """Quick and dirty multimodel selection using AIC and its offspring.""" @@ -52,7 +54,9 @@ def __init__(self): self.classweights = {} self.classprobs = {} self.sortedclassprobs = {} - self.sortedclasses = {} # dg->as self.classes, but with model indices sorted by best AIC + self.sortedclasses = ( + {} + ) # dg->as self.classes, but with model indices sorted by best AIC PeakStability.__init__(self) return @@ -67,7 +71,9 @@ def makeaics(self, dgs, dr, filename=None): nominal value. filename - Optional file to save pickled results """ - aics_out = {} # Version of self.aics that holds only the statistic, not the AIC object. + aics_out = ( + {} + ) # Version of self.aics that holds only the statistic, not the AIC object. self.dgs = np.array(dgs) for i, dg in enumerate(self.dgs): self.dgs_idx[dg] = i @@ -77,12 +83,11 @@ def makeaics(self, dgs, dr, filename=None): (r, y, dr, dy) = self.ppe.resampledata(dr) for model_idx in range(len(self.results)): - print "Testing model %s of %s." %(model_idx, len(self.results)) + print("Testing model %s of %s." % (model_idx, len(self.results))) result = self.results[model_idx] em = self.ppe.error_method - # This section dependent on spaghetti code elsewhere in srmise! # Short cut evaluation of AICs which doesn't require calculating chi-square # over and over again. This method assumes that the various uncertainties @@ -95,13 +100,17 @@ def makeaics(self, dgs, dr, filename=None): # modelevaluators subpackage are in need of a rewrite, and so it would be # best to do them all at once. dg0 = self.dgs[0] - mc = ModelCluster(result[1], result[2], r, y, dg0*np.ones(len(r)), None, em, self.ppe.pf) + mc = ModelCluster( + result[1], result[2], r, y, dg0 * np.ones(len(r)), None, em, self.ppe.pf + ) em0 = mc.quality() for dg in self.dgs: em_instance = em() - em_instance.chisq = em0.chisq*(dg0/dg)**2 # rescale chi-square - em_instance.evaluate(mc) # evaluate AIC without recalculating chi-square + em_instance.chisq = em0.chisq * (dg0 / dg) ** 2 # rescale chi-square + em_instance.evaluate( + mc + ) # evaluate AIC without recalculating chi-square self.aics[dg].append(em_instance) aics_out[dg].append(em_instance.stat) @@ -111,10 +120,10 @@ def makeaics(self, dgs, dr, filename=None): if filename is not None: try: - import cPickle as pickle + import cPickle as pickle except: - import pickle - out_s = open(filename, 'wb') + import pickle + out_s = open(filename, "wb") pickle.dump(aics_out, out_s) out_s.close() @@ -123,10 +132,10 @@ def makeaics(self, dgs, dr, filename=None): def loadaics(self, filename): """Load file containing results of the testall method.""" try: - import cPickle as pickle + import cPickle as pickle except: - import pickle - in_s = open(filename, 'rb') + import pickle + in_s = open(filename, "rb") aics_in = pickle.load(in_s) in_s.close() @@ -171,7 +180,7 @@ def makesortedprobs(self): for dg in self.dgs: self.sortedprobs[dg] = np.argsort(self.aicprobs[dg]).tolist() - def animate_probs(self, step=False, duration=0., **kwds): + def animate_probs(self, step=False, duration=0.0, **kwds): """Show animation of extracted peaks from first to last. Parameters: @@ -181,14 +190,17 @@ def animate_probs(self, step=False, duration=0., **kwds): Keywords passed to pyplot.plot()""" if duration > 0: import time - sleeptime = duration/len(self.dgs) + + sleeptime = duration / len(self.dgs) plt.ion() plt.subplot(211) best_idx = self.sortedprobs[self.dgs[0]][-1] - line, = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) + (line,) = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) vline = plt.axvline(self.dgs[0]) - dot, = plt.plot(self.dgs[best_idx],self.aicprobs[self.dgs[0]][best_idx],'ro') + (dot,) = plt.plot( + self.dgs[best_idx], self.aicprobs[self.dgs[0]][best_idx], "ro" + ) plt.subplot(212) self.setcurrent(best_idx) @@ -208,11 +220,11 @@ def animate_probs(self, step=False, duration=0., **kwds): plt.ion() plt.draw() if step: - raw_input() + input() if duration > 0: time.sleep(sleeptime) - def animate_classprobs(self, step=False, duration=0., **kwds): + def animate_classprobs(self, step=False, duration=0.0, **kwds): """Show animation of extracted peaks from first to last. Parameters: @@ -222,23 +234,31 @@ def animate_classprobs(self, step=False, duration=0., **kwds): Keywords passed to pyplot.plot()""" if duration > 0: import time - sleeptime = duration/len(self.dgs) + + sleeptime = duration / len(self.dgs) plt.ion() ax1 = plt.subplot(211) bestclass_idx = self.sortedclassprobs[self.dgs[0]][-1] best_idx = self.sortedclasses[self.dgs[0]][bestclass_idx][-1] - arrow_left = len(self.classes)-1 - arrow_right = arrow_left + .05*arrow_left - line, = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) - dot, = plt.plot(self.dgs[best_idx],self.classprobs[self.dgs[0]][bestclass_idx],'ro') - plt.axvline(arrow_left, color='k') + arrow_left = len(self.classes) - 1 + arrow_right = arrow_left + 0.05 * arrow_left + (line,) = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) + (dot,) = plt.plot( + self.dgs[best_idx], self.classprobs[self.dgs[0]][bestclass_idx], "ro" + ) + plt.axvline(arrow_left, color="k") ax2 = ax1.twinx() - ax2.set_ylim(self.dgs[0],self.dgs[-1]) + ax2.set_ylim(self.dgs[0], self.dgs[-1]) ax2.set_ylabel("dg") ax1.set_xlim(right=arrow_right) ax2.set_xlim(right=arrow_right) - dgarrow = ax2.annotate("",(arrow_right, self.dgs[0]), (arrow_left, self.dgs[0]), arrowprops=dict(arrowstyle="-|>")) + dgarrow = ax2.annotate( + "", + (arrow_right, self.dgs[0]), + (arrow_left, self.dgs[0]), + arrowprops=dict(arrowstyle="-|>"), + ) plt.subplot(212) self.setcurrent(best_idx) @@ -247,7 +267,7 @@ def animate_classprobs(self, step=False, duration=0., **kwds): minval = np.min(val[1::2]) [r, res] = tempcluster.plottable_residual() plt.plot(*val) - plt.plot(r, minval-np.max(res)+res) + plt.plot(r, minval - np.max(res) + res) for dg in self.dgs[1:]: plt.ioff() line.set_ydata(self.classprobs[dg]) @@ -263,13 +283,13 @@ def animate_classprobs(self, step=False, duration=0., **kwds): minval = np.min(val[1::2]) [r, res] = tempcluster.plottable_residual() plt.plot(*val) - plt.plot(r, minval-np.max(res)+res) + plt.plot(r, minval - np.max(res) + res) dot.set_xdata(bestclass_idx) dot.set_ydata(self.classprobs[dg][bestclass_idx]) plt.ion() plt.draw() if step: - raw_input() + input() if duration > 0: time.sleep(sleeptime) @@ -295,16 +315,18 @@ def classify(self, r, tolerance=0.05): self.classes_idx = {} self.class_tolerance = None - classes = [] # each element is a list of the models (result indices) in the class - classes_idx = {} # given an integer corresponding to a model, return its class - epsqval = {} # holds the squared value of each class' exemplar peaks - ebsqval = {} # holds the squared value of each class exemplar baselines + classes = ( + [] + ) # each element is a list of the models (result indices) in the class + classes_idx = {} # given an integer corresponding to a model, return its class + epsqval = {} # holds the squared value of each class' exemplar peaks + ebsqval = {} # holds the squared value of each class exemplar baselines for i in range(len(self.results)): peaks = self.results[i][1] baseline = self.results[i][2] - bsqval = baseline.value(r)**2 - psqval = [p.value(r)**2 for p in peaks] + bsqval = baseline.value(r) ** 2 + psqval = [p.value(r) ** 2 for p in peaks] added_to_class = False for c in range(len(classes)): @@ -318,10 +340,10 @@ def classify(self, r, tolerance=0.05): continue # check peak types and number of parameters - badpeak=False + badpeak = False if len(peaks) != len(exemplar_peaks): continue - for p, ep in zip(peaks,exemplar_peaks): + for p, ep in zip(peaks, exemplar_peaks): if type(p) != type(ep): badpeak = True break @@ -333,19 +355,23 @@ def classify(self, r, tolerance=0.05): # check peak values current_psqval = [] - for p, ep in zip(psqval,epsqval[c]): - basediff = np.abs(np.sum(p-ep)) - #if basediff > tolerance*np.sum(ep): - if basediff > tolerance*np.sum(ep) or basediff > tolerance*np.sum(p): + for p, ep in zip(psqval, epsqval[c]): + basediff = np.abs(np.sum(p - ep)) + # if basediff > tolerance*np.sum(ep): + if basediff > tolerance * np.sum( + ep + ) or basediff > tolerance * np.sum(p): badpeak = True break if badpeak: continue # check baseline values - basediff = np.abs(np.sum(bsqval-ebsqval[c])) - #if basediff > tolerance*np.sum(ebsqval[c]): - if basediff > tolerance*np.sum(ebsqval[c]) or basediff > tolerance*np.sum(bsqval): + basediff = np.abs(np.sum(bsqval - ebsqval[c])) + # if basediff > tolerance*np.sum(ebsqval[c]): + if basediff > tolerance * np.sum( + ebsqval[c] + ) or basediff > tolerance * np.sum(bsqval): continue # that's all the checks, add to current class @@ -357,7 +383,7 @@ def classify(self, r, tolerance=0.05): if added_to_class is False: # make a new class with the current model as exemplar classes.append([i]) - classnum = len(classes)-1 + classnum = len(classes) - 1 classes_idx[i] = classnum epsqval[classnum] = psqval ebsqval[classnum] = bsqval @@ -393,7 +419,9 @@ def makeclassweights(self): for dg in self.dgs: bestinclass = [cls[-1] for cls in self.sortedclasses[dg]] - self.classweights[dg] = em.akaikeweights([self.aics[dg][b] for b in bestinclass]) + self.classweights[dg] = em.akaikeweights( + [self.aics[dg][b] for b in bestinclass] + ) def makeclassprobs(self): self.classprobs = {} @@ -401,7 +429,9 @@ def makeclassprobs(self): for dg in self.dgs: bestinclass = [cls[-1] for cls in self.sortedclasses[dg]] - self.classprobs[dg] = em.akaikeprobs([self.aics[dg][b] for b in bestinclass]) + self.classprobs[dg] = em.akaikeprobs( + [self.aics[dg][b] for b in bestinclass] + ) def makesortedclassprobs(self): self.sortedclassprobs = {} @@ -411,7 +441,7 @@ def makesortedclassprobs(self): def dg_key(self, dg_in): """Return the dg value usable as a key nearest to dg_in.""" - idx = (np.abs(self.dgs-dg_in)).argmin() + idx = (np.abs(self.dgs - dg_in)).argmin() return self.dgs[idx] def bestclasses(self, dgs=None): @@ -489,7 +519,7 @@ def plot3dclassprobs(self, **kwds): from mpl_toolkits.mplot3d import Axes3D fig = kwds.pop("figure", plt.gcf()) - ax = fig.add_subplot(kwds.pop("subplot",111), projection='3d') + ax = fig.add_subplot(kwds.pop("subplot", 111), projection="3d") cbkwds = kwds.copy() @@ -497,16 +527,16 @@ def plot3dclassprobs(self, **kwds): dGs = kwds.pop("dGs", self.dgs) highlight = kwds.pop("highlight", []) classes = kwds.pop("classes", range(len(self.classes))) - probfilter = kwds.pop("probfilter", [0.,1.]) + probfilter = kwds.pop("probfilter", [0.0, 1.0]) class_size = kwds.pop("class_size", "number") norm = kwds.pop("norm", "auto") cmap = kwds.pop("cmap", cm.jet) highlight_cmap = kwds.pop("highlight_cmap", cm.gray) title = kwds.pop("title", True) p_alpha = kwds.pop("p_alpha", 0.7) - scale = kwds.pop("scale", 1.) + scale = kwds.pop("scale", 1.0) - xs = dGs*scale + xs = dGs * scale verts = [] zs = [] zlabels = [] @@ -515,22 +545,22 @@ def plot3dclassprobs(self, **kwds): maxys = np.max(ys) if maxys >= probfilter[0] and maxys <= probfilter[1]: - p0, p1 = ((xs[0], 0), (xs[-1],0)) # points to close the vertices - verts.append(np.concatenate([[p0], zip(xs,ys), [p1], [p0]])) + p0, p1 = ((xs[0], 0), (xs[-1], 0)) # points to close the vertices + verts.append(np.concatenate([[p0], zip(xs, ys), [p1], [p0]])) zlabels.append(i) ### Define face colors fc = np.array([len(self.classes[z]) for z in zlabels]) if class_size is "fraction": - fc = fc/float(len(self.results)) + fc = fc / float(len(self.results)) # Index the colormap if necessary if class_size is "number": if norm is "auto": - indexedcolors = cmap(np.linspace(0., 1., np.max(fc))) + indexedcolors = cmap(np.linspace(0.0, 1.0, np.max(fc))) cmap = colors.ListedColormap(indexedcolors) elif norm is "full": - indexedcolors = cmap(np.linspace(0., 1., len(self.results))) + indexedcolors = cmap(np.linspace(0.0, 1.0, len(self.results))) cmap = colors.ListedColormap(indexedcolors) # A user-specified norm cannot be used to index a colormap. @@ -540,45 +570,59 @@ def plot3dclassprobs(self, **kwds): mic = np.min(fc) mac = np.max(fc) nc = mac - mic + 1 - norm = colors.BoundaryNorm(np.linspace(mic, mac+1, nc+1), nc) + norm = colors.BoundaryNorm(np.linspace(mic, mac + 1, nc + 1), nc) if class_size is "fraction": norm = colors.Normalize() norm.autoscale(fc) elif norm is "full": mcolor = len(self.results) if class_size is "number": - norm = colors.BoundaryNorm(np.linspace(0, mcolor+1, mcolor+2), mcolor+1) + norm = colors.BoundaryNorm( + np.linspace(0, mcolor + 1, mcolor + 2), mcolor + 1 + ) if class_size is "fraction": - norm = colors.Normalize(0., 1.) + norm = colors.Normalize(0.0, 1.0) zs = np.arange(len(zlabels)) poly = PolyCollection(verts, facecolors=cmap(norm(fc)), closed=False) poly.set_alpha(p_alpha) - cax = ax.add_collection3d(poly, zs=zs, zdir='y') + cax = ax.add_collection3d(poly, zs=zs, zdir="y") # Highlight values of interest color_idx = np.linspace(0, 1, len(highlight)) for dG, ci in zip(highlight, color_idx): for z_logical, z_plot in zip(zlabels, zs): - ax.plot([dG, dG], [z_plot, z_plot], [0, self.classprobs[dG][z_logical]], color=highlight_cmap(ci), alpha=p_alpha) - - ax.set_xlabel('dG') - ax.set_xlim3d(dGs[0]*scale, dGs[-1]*scale) - ax.set_ylabel('Class') + ax.plot( + [dG, dG], + [z_plot, z_plot], + [0, self.classprobs[dG][z_logical]], + color=highlight_cmap(ci), + alpha=p_alpha, + ) + + ax.set_xlabel("dG") + ax.set_xlim3d(dGs[0] * scale, dGs[-1] * scale) + ax.set_ylabel("Class") ax.set_ylim3d(zs[0], zs[-1]) ax.set_yticks(zs) ax.set_yticklabels([str(z) for z in zlabels]) - ax.set_zlabel('Akaike probability') + ax.set_zlabel("Akaike probability") ax.set_zlim3d(0, 1) if title is True: - title = "Class probabilities\n\ + title = ( + "Class probabilities\n\ Max probabilities in %s\n\ - %i/%i classes with %i/%i models displayed"\ - %(probfilter, - len(zs), len(self.classes), - np.sum([len(self.classes[z]) for z in zlabels]), len(self.results) ) + %i/%i classes with %i/%i models displayed" + % ( + probfilter, + len(zs), + len(self.classes), + np.sum([len(self.classes[z]) for z in zlabels]), + len(self.results), + ) + ) if title is not False: figtitle = fig.suptitle(title) @@ -586,23 +630,26 @@ def plot3dclassprobs(self, **kwds): # Add colorbar if "cbpos" in kwds: cbpos = kwds.pop("cbpos") - aspect = cbpos[3]/cbpos[2] - plt.tight_layout() # do it before cbaxis, so colorbar is ignored. + aspect = cbpos[3] / cbpos[2] + plt.tight_layout() # do it before cbaxis, so colorbar is ignored. transAtoF = ax.transAxes + fig.transFigure.inverted() rect = transforms.Bbox.from_bounds(*cbpos).transformed(transAtoF).bounds cbaxis = fig.add_axes(rect) # Remove all colorbar.make_axes keywords except orientation - kwds = eatkwds("fraction", "pad", "shrink", "aspect", - "anchor", "panchor", **kwds) + kwds = eatkwds( + "fraction", "pad", "shrink", "aspect", "anchor", "panchor", **kwds + ) else: kwds.setdefault("shrink", 0.75) # In matplotlib 1.1.0 make_axes_gridspec ignores anchor and panchor keywords. # Eat these keywords for now. kwds = eatkwds("anchor", "panchor", **kwds) - cbaxis, kwds = colorbar.make_axes_gridspec(ax, **kwds) # gridspec allows tight_layout - plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored + cbaxis, kwds = colorbar.make_axes_gridspec( + ax, **kwds + ) # gridspec allows tight_layout + plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored cb = colorbar.ColorbarBase(cbaxis, cmap=cmap, norm=norm, **kwds) @@ -611,8 +658,7 @@ def plot3dclassprobs(self, **kwds): elif class_size is "fraction": cb.set_label("Fraction of models in class") - - return {"fig":fig, "axis":ax, "cb":cb, "cbaxis": cbaxis} + return {"fig": fig, "axis": ax, "cb": cb, "cbaxis": cbaxis} def get_model(self, dG, **kwds): """Return index of best model of best class at given dG. @@ -625,7 +671,8 @@ def get_model(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ corder = kwds.pop("corder", 0) morder = kwds.pop("morder", 0) if "cls" in kwds: @@ -634,9 +681,9 @@ def get_model(self, dG, **kwds): cls = self.get_class(dG, corder=corder) if cls is None: - return self.sortedprobs[dG][-1-morder] + return self.sortedprobs[dG][-1 - morder] else: - return self.sortedclasses[dG][cls][-1-morder] + return self.sortedclasses[dG][cls][-1 - morder] def get_class(self, dG, **kwds): """Return index of best class at given dG. @@ -646,9 +693,10 @@ def get_class(self, dG, **kwds): Keywords: - corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default).""" + corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). + """ corder = kwds.pop("corder", 0) - return self.sortedclassprobs[dG][-1-corder] # index of corderth best class + return self.sortedclassprobs[dG][-1 - corder] # index of corderth best class def get_prob(self, dG, **kwds): """Return Akaike probability of best model of best class at given dG. @@ -661,7 +709,8 @@ def get_prob(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) if "cls" in kwds and kwds["cls"] is None: return self.aicprobs[dG][idx] @@ -680,7 +729,8 @@ def get_nfree(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) model = self.results[idx][1] baseline = self.results[idx][2] @@ -697,7 +747,8 @@ def get_aic(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) return self.aics[dG][idx].stat @@ -714,13 +765,16 @@ def get(self, dG, *args, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" - fdict = {"aic": self.get_aic, - "class": self.get_class, - "dg": lambda x: x, - "model": self.get_model, - "nfree": self.get_nfree, - "prob": self.get_prob} + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ + fdict = { + "aic": self.get_aic, + "class": self.get_class, + "dg": lambda x: x, + "model": self.get_model, + "nfree": self.get_nfree, + "prob": self.get_prob, + } values = [] for a in args: values.append(fdict[a].__call__(dG, **kwds)) diff --git a/diffpy/srmise/pdfdataset.py b/diffpy/srmise/pdfdataset.py index 2bdd9bf..feb64fa 100644 --- a/diffpy/srmise/pdfdataset.py +++ b/diffpy/srmise/pdfdataset.py @@ -31,6 +31,7 @@ class PDFComponent(object): """Common base class.""" + def __init__(self, name): """initialize @@ -38,13 +39,14 @@ def __init__(self, name): """ self.name = name - def close ( self, force = False ): + def close(self, force=False): """close myself force -- if forcibly (no wait) """ pass + class PDFDataSet(PDFComponent): """PDFDataSet is a class for experimental PDF data. @@ -75,9 +77,21 @@ class PDFDataSet(PDFComponent): refinableVars -- set (dict) of refinable variable names. """ - persistentItems = [ 'robs', 'Gobs', 'drobs', 'dGobs', 'stype', 'qmax', - 'qdamp', 'qbroad', 'dscale', 'rmin', 'rmax', 'metadata' ] - refinableVars = dict.fromkeys(('qdamp', 'qbroad', 'dscale')) + persistentItems = [ + "robs", + "Gobs", + "drobs", + "dGobs", + "stype", + "qmax", + "qdamp", + "qbroad", + "dscale", + "rmin", + "rmax", + "metadata", + ] + refinableVars = dict.fromkeys(("qdamp", "qbroad", "dscale")) def __init__(self, name): """Initialize. @@ -94,7 +108,7 @@ def clear(self): self.Gobs = [] self.drobs = [] self.dGobs = [] - self.stype = 'X' + self.stype = "X" # user must specify qmax to get termination ripples self.qmax = 0.0 self.qdamp = 0.001 @@ -149,16 +163,17 @@ def read(self, filename): returns self """ try: - self.readStr(open(filename,'rb').read()) - except PDFDataFormatError, err: + self.readStr(open(filename, "rb").read()) + except PDFDataFormatError as err: basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ( + "Could not open '%s' due to unsupported file format " + + "or corrupted data. [%s]" + ) % (basename, err) raise SrMiseFileError(emsg) self.filename = os.path.abspath(filename) return self - def readStr(self, datastring): """read experimental PDF data from a string @@ -168,15 +183,15 @@ def readStr(self, datastring): """ self.clear() # useful regex patterns: - rx = { 'f' : r'[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?' } + rx = {"f": r"[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?"} # find where does the data start - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", datastring, re.M) # start_data is position where the first data line starts if res: start_data = res.end() else: # find line that starts with a floating point number - regexp = r'^\s*%(f)s' % rx + regexp = r"^\s*%(f)s" % rx res = re.search(regexp, datastring, re.M) if res: start_data = res.start() @@ -186,18 +201,18 @@ def readStr(self, datastring): databody = datastring[start_data:].strip() # find where the metadata starts - metadata = '' - res = re.search(r'^#+\ +metadata\b\n', header, re.M) + metadata = "" + res = re.search(r"^#+\ +metadata\b\n", header, re.M) if res: - metadata = header[res.end():] - header = header[:res.start()] + metadata = header[res.end() :] + header = header[: res.start()] # parse header # stype - if re.search('(x-?ray|PDFgetX)', header, re.I): - self.stype = 'X' - elif re.search('(neutron|PDFgetN)', header, re.I): - self.stype = 'N' + if re.search("(x-?ray|PDFgetX)", header, re.I): + self.stype = "X" + elif re.search("(neutron|PDFgetN)", header, re.I): + self.stype = "N" # qmax regexp = r"\bqmax *= *(%(f)s)\b" % rx res = re.search(regexp, header, re.I) @@ -227,12 +242,12 @@ def readStr(self, datastring): regexp = r"\b(?:temp|temperature|T)\ *=\ *(%(f)s)\b" % rx res = re.search(regexp, header) if res: - self.metadata['temperature'] = float(res.groups()[0]) + self.metadata["temperature"] = float(res.groups()[0]) # doping regexp = r"\b(?:x|doping)\ *=\ *(%(f)s)\b" % rx res = re.search(regexp, header) if res: - self.metadata['doping'] = float(res.groups()[0]) + self.metadata["doping"] = float(res.groups()[0]) # parsing gerneral metadata if metadata: @@ -241,12 +256,12 @@ def readStr(self, datastring): res = re.search(regexp, metadata, re.M) if res: self.metadata[res.groups()[0]] = float(res.groups()[1]) - metadata = metadata[res.end():] + metadata = metadata[res.end() :] else: break # read actual data - robs, Gobs, drobs, dGobs - inf_or_nan = re.compile('(?i)^[+-]?(NaN|Inf)\\b') + inf_or_nan = re.compile("(?i)^[+-]?(NaN|Inf)\\b") has_drobs = True has_dGobs = True # raise PDFDataFormatError if something goes wrong @@ -257,15 +272,13 @@ def readStr(self, datastring): self.robs.append(float(v[0])) self.Gobs.append(float(v[1])) # drobs is valid if all values are defined and positive - has_drobs = (has_drobs and - len(v) > 2 and not inf_or_nan.match(v[2])) + has_drobs = has_drobs and len(v) > 2 and not inf_or_nan.match(v[2]) if has_drobs: v2 = float(v[2]) has_drobs = v2 > 0.0 self.drobs.append(v2) # dGobs is valid if all values are defined and positive - has_dGobs = (has_dGobs and - len(v) > 3 and not inf_or_nan.match(v[3])) + has_dGobs = has_dGobs and len(v) > 3 and not inf_or_nan.match(v[3]) if has_dGobs: v3 = float(v[3]) has_dGobs = v3 > 0.0 @@ -274,15 +287,16 @@ def readStr(self, datastring): self.drobs = len(self.robs) * [0.0] if not has_dGobs: self.dGobs = len(self.robs) * [0.0] - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise PDFDataFormatError(err) self.rmin = self.robs[0] self.rmax = self.robs[-1] - if not has_drobs: self.drobs = len(self.robs) * [0.0] - if not has_dGobs: self.dGobs = len(self.robs) * [0.0] + if not has_drobs: + self.drobs = len(self.robs) * [0.0] + if not has_dGobs: + self.dGobs = len(self.robs) * [0.0] return self - def write(self, filename): """Write experimental PDF data to a file. @@ -291,7 +305,7 @@ def write(self, filename): No return value. """ bytes = self.writeStr() - f = open(filename, 'w') + f = open(filename, "w") f.write(bytes) f.close() return @@ -303,38 +317,43 @@ def writeStr(self): """ lines = [] # write metadata - lines.extend([ - 'History written: ' + time.ctime(), - 'produced by ' + getuser(), - '##### PDFgui' ]) + lines.extend( + [ + "History written: " + time.ctime(), + "produced by " + getuser(), + "##### PDFgui", + ] + ) # stype - if self.stype == 'X': - lines.append('stype=X x-ray scattering') - elif self.stype == 'N': - lines.append('stype=N neutron scattering') + if self.stype == "X": + lines.append("stype=X x-ray scattering") + elif self.stype == "N": + lines.append("stype=N neutron scattering") # qmax if self.qmax == 0: - qmax_line = 'qmax=0 correction not applied' + qmax_line = "qmax=0 correction not applied" else: - qmax_line = 'qmax=%.2f' % self.qmax + qmax_line = "qmax=%.2f" % self.qmax lines.append(qmax_line) # qdamp - lines.append('qdamp=%g' % self.qdamp) + lines.append("qdamp=%g" % self.qdamp) # qbroad - lines.append('qbroad=%g' % self.qbroad) + lines.append("qbroad=%g" % self.qbroad) # dscale - lines.append('dscale=%g' % self.dscale) + lines.append("dscale=%g" % self.dscale) # metadata if len(self.metadata) > 0: - lines.append('# metadata') + lines.append("# metadata") for k, v in self.metadata.iteritems(): - lines.append( "%s=%s" % (k,v) ) + lines.append("%s=%s" % (k, v)) # write data: - lines.append('##### start data') - lines.append('#L r(A) G(r) d_r d_Gr') + lines.append("##### start data") + lines.append("#L r(A) G(r) d_r d_Gr") for i in range(len(self.robs)): - lines.append('%g %g %g %g' % \ - (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i]) ) + lines.append( + "%g %g %g %g" + % (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i]) + ) # that should be it datastring = "\n".join(lines) + "\n" return datastring @@ -351,48 +370,62 @@ def copy(self, other=None): other.clear() # some attributes can be assigned, e.g., robs, Gobs, drobs, dGobs are # constant so they can be shared between copies. - assign_attributes = ( 'robs', 'Gobs', 'drobs', 'dGobs', 'stype', - 'qmax', 'qdamp', 'qbroad', 'dscale', - 'rmin', 'rmax', 'filename' ) + assign_attributes = ( + "robs", + "Gobs", + "drobs", + "dGobs", + "stype", + "qmax", + "qdamp", + "qbroad", + "dscale", + "rmin", + "rmax", + "filename", + ) # for others we will assign a copy - copy_attributes = ( 'metadata', ) + copy_attributes = ("metadata",) for a in assign_attributes: setattr(other, a, getattr(self, a)) import copy + for a in copy_attributes: setattr(other, a, copy.deepcopy(getattr(self, a))) return other + # End of class PDFDataSet class PDFDataFormatError(Exception): - """Exception class marking failure to proccess PDF data string. - """ + """Exception class marking failure to proccess PDF data string.""" + pass # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import sys + filename = sys.argv[1] dataset = PDFDataSet("test") dataset.read(filename) - print "== metadata ==" + print("== metadata ==") for k, v in dataset.metadata.iteritems(): - print k, "=", v - print "== data members ==" + print(k, "=", v) + print("== data members ==") for k, v in dataset.__dict__.iteritems(): - if k in ('metadata', 'robs', 'Gobs', 'drobs', 'dGobs') or k[0] == "_": + if k in ("metadata", "robs", "Gobs", "drobs", "dGobs") or k[0] == "_": continue - print k, "=", v - print "== robs Gobs drobs dGobs ==" + print(k, "=", v) + print("== robs Gobs drobs dGobs ==") for i in range(len(dataset.robs)): - print dataset.robs[i], dataset.Gobs[i], dataset.drobs[i], dataset.dGobs[i] - print "== writeStr() ==" - print dataset.writeStr() - print "== datasetcopy.writeStr() ==" + print(dataset.robs[i], dataset.Gobs[i], dataset.drobs[i], dataset.dGobs[i]) + print("== writeStr() ==") + print(dataset.writeStr()) + print("== datasetcopy.writeStr() ==") datasetcopy = dataset.copy() - print datasetcopy.writeStr() + print(datasetcopy.writeStr()) # End of file