diff --git a/devutils/prep.py b/devutils/prep.py index c63ce0f..b765f7d 100644 --- a/devutils/prep.py +++ b/devutils/prep.py @@ -111,5 +111,3 @@ def rm(directory, filerestr): print "==== Scrubbing Endlines ====" # All *.srmise and *.pwa files in examples directory. scrubeol("../doc/examples/output", r".*(\.srmise|\.pwa)") - - diff --git a/diffpy/srmise/baselines/nanospherical.py b/diffpy/srmise/baselines/nanospherical.py index 04c1d43..57f7444 100644 --- a/diffpy/srmise/baselines/nanospherical.py +++ b/diffpy/srmise/baselines/nanospherical.py @@ -22,7 +22,8 @@ logger = logging.getLogger("diffpy.srmise") -class NanoSpherical (BaselineFunction): + +class NanoSpherical(BaselineFunction): """Methods for evaluation of baseline of spherical nanoparticle of uniform density. Allowed formats are @@ -47,29 +48,31 @@ def __init__(self, Cache=None): evaluations. """ # Define parameterdict - parameterdict = {'scale':0, 'radius':1} - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + parameterdict = {"scale": 0, "radius": 1} + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} metadict = {} - BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) + BaselineFunction.__init__( + self, parameterdict, formats, default_formats, metadict, None, Cache + ) #### Methods required by BaselineFunction #### -# def estimate_parameters(self, r, y): -# """Estimate parameters for spherical baseline. (Not implemented!) -# -# Parameters -# r - array along r from which to estimate -# y - array along y from which to estimate -# -# Returns Numpy array of parameters in the default internal format. -# Raises NotImplementedError if estimation is not implemented for this -# degree, or SrMiseEstimationError if parameters cannot be estimated for -# any other reason. -# """ -# if len(r) != len(y): -# emsg = "Arrays r, y must have equal length." -# raise ValueError(emsg) + # def estimate_parameters(self, r, y): + # """Estimate parameters for spherical baseline. (Not implemented!) + # + # Parameters + # r - array along r from which to estimate + # y - array along y from which to estimate + # + # Returns Numpy array of parameters in the default internal format. + # Raises NotImplementedError if estimation is not implemented for this + # degree, or SrMiseEstimationError if parameters cannot be estimated for + # any other reason. + # """ + # if len(r) != len(y): + # emsg = "Arrays r, y must have equal length." + # raise ValueError(emsg) def _jacobianraw(self, pars, r, free): """Return the Jacobian of the spherical baseline. @@ -83,22 +86,26 @@ def _jacobianraw(self, pars, r, free): needed. True for evaluation, False for no evaluation. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) jacobian = [None for p in range(self.npars)] if (free == False).sum() == self.npars: return jacobian if np.isscalar(r): - if r <= 0. or r >= 2.*pars[1]: - if free[0]: jacobian[0] = 0. - if free[1]: jacobian[1] = 0. + if r <= 0.0 or r >= 2.0 * pars[1]: + if free[0]: + jacobian[0] = 0.0 + if free[1]: + jacobian[1] = 0.0 else: - if free[0]: jacobian[0] = self._jacobianrawscale(pars, r) - if free[1]: jacobian[1] = self._jacobianrawradius(pars, r) + if free[0]: + jacobian[0] = self._jacobianrawscale(pars, r) + if free[1]: + jacobian[1] = self._jacobianrawradius(pars, r) else: s = self._getdomain(pars, r) if free[0]: @@ -120,12 +127,12 @@ def _jacobianrawscale(self, pars, r): """ s = np.abs(pars[0]) R = np.abs(pars[1]) - rdivR = r/R + rdivR = r / R # From abs'(s) in derivative, which is equivalent to sign(s) except at 0 where it # is undefined. Since s=0 is equivalent to the absence of a nanoparticle, sign will # be fine. sign = np.sign(pars[1]) - return -sign*r*(1-(3./4.)*rdivR+(1./16.)*rdivR**3) + return -sign * r * (1 - (3.0 / 4.0) * rdivR + (1.0 / 16.0) * rdivR**3) def _jacobianrawradius(self, pars, r): """Return partial Jacobian wrt radius without bounds checking. @@ -141,7 +148,7 @@ def _jacobianrawradius(self, pars, r): # From abs'(R) in derivative, which is equivalent to sign(R) except at 0 where it # is undefined. Since R=0 is a singularity anyway, sign will be fine. sign = np.sign(pars[1]) - return sign*s*(3*r**2*(r**2-4*R**2))/(16*R**4) + return sign * s * (3 * r**2 * (r**2 - 4 * R**2)) / (16 * R**4) def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. @@ -162,15 +169,17 @@ def _transform_parametersraw(self, pars, in_format, out_format): temp[0] = np.abs(temp[0]) temp[1] = np.abs(temp[1]) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'in_format' must be one of %s." % self.parformats + ) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'out_format' must be one of %s." % self.parformats + ) return temp def _valueraw(self, pars, r): @@ -185,11 +194,11 @@ def _valueraw(self, pars, r): r - sequence or scalar over which pars is evaluated. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if np.isscalar(r): - if r <= 0. or r >= 2.*pars[1]: - return 0. + if r <= 0.0 or r >= 2.0 * pars[1]: + return 0.0 else: return self._valueraw2(pars, r) else: @@ -209,38 +218,44 @@ def _valueraw2(self, pars, r): """ s = np.abs(pars[0]) R = np.abs(pars[1]) - rdivR = r/R - return -s*r*(1-(3./4.)*rdivR+(1./16.)*rdivR**3) + rdivR = r / R + return -s * r * (1 - (3.0 / 4.0) * rdivR + (1.0 / 16.0) * rdivR**3) def _getdomain(self, pars, r): """Return slice object for which r > 0 and r < twice the radius""" - low = r.searchsorted(0., side='right') - high = r.searchsorted(2.*pars[1], side='left') + low = r.searchsorted(0.0, side="right") + high = r.searchsorted(2.0 * pars[1], side="left") return slice(low, high) def getmodule(self): return __name__ -#end of class NanoSpherical + +# end of class NanoSpherical # simple test code -if __name__ == '__main__': +if __name__ == "__main__": f = NanoSpherical() r = np.arange(-5, 10) - pars = np.array([-1., 7.]) + pars = np.array([-1.0, 7.0]) free = np.array([False, True]) - print "Testing nanoparticle spherical baseline" - print "Scale: %f, Radius: %f" %(pars[0], pars[1]) - print "-----------------------------------------" + print("Testing nanoparticle spherical baseline") + print("Scale: %f, Radius: %f" % (pars[0], pars[1])) + print("-----------------------------------------") val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - outjac = [j if j is not None else [None]*len(r) for j in jac] - print "r".center(10), "value".center(10), "jac(scale)".center(10), "jac(radius)".center(10) + jac = f._jacobianraw(pars, r, free) + outjac = [j if j is not None else [None] * len(r) for j in jac] + print( + "r".center(10), + "value".center(10), + "jac(scale)".center(10), + "jac(radius)".center(10), + ) for tup in zip(r, val, *outjac): for t in tup: if t is None: - print ("%s" %None).ljust(10), + print("%s" % None).ljust(10), else: - print ("% .3g" %t).ljust(10), + print("% .3g" % t).ljust(10), print diff --git a/diffpy/srmise/baselines/polynomial.py b/diffpy/srmise/baselines/polynomial.py index 4d95c4f..a2cd4d5 100644 --- a/diffpy/srmise/baselines/polynomial.py +++ b/diffpy/srmise/baselines/polynomial.py @@ -22,7 +22,8 @@ logger = logging.getLogger("diffpy.srmise") -class Polynomial (BaselineFunction): + +class Polynomial(BaselineFunction): """Methods for evaluation and parameter estimation of a polynomial baseline.""" def __init__(self, degree, Cache=None): @@ -41,17 +42,19 @@ def __init__(self, degree, Cache=None): emsg = "Argument degree must be an integer." raise ValueError(emsg) if self.degree < 0: - self.degree = -1 # interpreted as negative infinity + self.degree = -1 # interpreted as negative infinity # Define parameterdict # e.g. {"a_0":3, "a_1":2, "a_2":1, "a_3":0} if degree is 3. parameterdict = {} - for d in range(self.degree+1): - parameterdict["a_"+str(d)] = self.degree - d - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + for d in range(self.degree + 1): + parameterdict["a_" + str(d)] = self.degree - d + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} metadict = {} metadict["degree"] = (degree, repr) - BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) + BaselineFunction.__init__( + self, parameterdict, formats, default_formats, metadict, None, Cache + ) #### Methods required by BaselineFunction #### @@ -81,7 +84,7 @@ def estimate_parameters(self, r, y): return np.array([]) if self.degree == 0: - return np.array([0.]) + return np.array([0.0]) if self.degree == 1: # Estimate degree=1 baseline. @@ -90,15 +93,16 @@ def estimate_parameters(self, r, y): # lies above the baseline. # TODO: Make this more sophisticated. try: - cut = np.max([len(y)/10, 1]) + cut = np.max([len(y) / 10, 1]) cut_idx = y.argsort()[:cut] import numpy.linalg as la + A = np.array([r[cut_idx]]).T slope = la.lstsq(A, y[cut_idx])[0][0] - return np.array([slope, 0.]) - except Exception, e: - emsg = "Error during estimation -- "+str(e) + return np.array([slope, 0.0]) + except Exception as e: + emsg = "Error during estimation -- " + str(e) raise raise SrMiseEstimationError(emsg) @@ -116,10 +120,10 @@ def _jacobianraw(self, pars, r, free): needed. True for evaluation, False for no evaluation. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) jacobian = [None for p in range(self.npars)] if (free == False).sum() == self.npars: @@ -148,15 +152,17 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": pass else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'in_format' must be one of %s." % self.parformats + ) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'out_format' must be one of %s." % self.parformats + ) return temp def _valueraw(self, pars, r): @@ -171,51 +177,54 @@ def _valueraw(self, pars, r): If degree is negative infinity, pars is an empty sequence. r: sequence or scalar over which pars is evaluated""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) return np.polyval(pars, r) def getmodule(self): return __name__ -#end of class Polynomial + +# end of class Polynomial # simple test code -if __name__ == '__main__': +if __name__ == "__main__": # Test polynomial of degree 3 - print "Testing degree 3 polynomial" - print "---------------------------" - f = Polynomial(degree = 3) + print("Testing degree 3 polynomial") + print("---------------------------") + f = Polynomial(degree=3) r = np.arange(5) pars = np.array([3, 0, 1, 2]) free = np.array([True, False, True, True]) val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - print "Value:\n", val - print "Jacobian: " - for j in jac: print " %s" %j + jac = f._jacobianraw(pars, r, free) + print("Value:\n", val) + print("Jacobian: ") + for j in jac: + print(" %s" % j) # Test polynomial of degree -oo - print "\nTesting degree -oo polynomial (== 0)" - print "------------------------------------" - f = Polynomial(degree = -1) + print("\nTesting degree -oo polynomial (== 0)") + print("------------------------------------") + f = Polynomial(degree=-1) r = np.arange(5) pars = np.array([]) free = np.array([]) val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - print "Value:\n", val - print "Jacobian: " - for j in jac: print " %s" %j + jac = f._jacobianraw(pars, r, free) + print("Value:\n", val) + print("Jacobian: ") + for j in jac: + print(" %s" % j) # Test linear estimation - print "\nTesting linear baseline estimation" - print "------------------------------------" - f = Polynomial(degree = 1) + print("\nTesting linear baseline estimation") + print("------------------------------------") + f = Polynomial(degree=1) pars = np.array([1, 0]) - r = np.arange(0, 10, .1) - y = -r + 10*np.exp(-(r-5)**2) + np.random.rand(len(r)) + r = np.arange(0, 10, 0.1) + y = -r + 10 * np.exp(-((r - 5) ** 2)) + np.random.rand(len(r)) est = f.estimate_parameters(r, y) - print "Actual baseline: ", np.array([-1, 0.]) - print "Estimated baseline: ", est + print("Actual baseline: ", np.array([-1, 0.0])) + print("Estimated baseline: ", est) diff --git a/diffpy/srmise/modelevaluators/aic.py b/diffpy/srmise/modelevaluators/aic.py index 915cace..1bb8f9a 100644 --- a/diffpy/srmise/modelevaluators/aic.py +++ b/diffpy/srmise/modelevaluators/aic.py @@ -21,7 +21,8 @@ logger = logging.getLogger("diffpy.srmise") -class AIC (ModelEvaluator): + +class AIC(ModelEvaluator): """Evaluate and compare models with the AIC statistic. Akaike's Information Criterion (AIC) is a method for comparing statistical @@ -52,11 +53,11 @@ def __init__(self): def evaluate(self, fit, count_fixed=False, kshift=0): """Return quality of fit for given ModelCluster using AIC (Akaike's Information Criterion). - Parameters - fit: A ModelCluster - count_fixed: Whether fixed parameters are considered. - kshift: (0) Treat the model has having this many additional - parameters. Negative values also allowed.""" + Parameters + fit: A ModelCluster + count_fixed: Whether fixed parameters are considered. + kshift: (0) Treat the model has having this many additional + parameters. Negative values also allowed.""" # Number of parameters. By default, fixed parameters are ignored. k = fit.model.npars(count_fixed=count_fixed) + kshift if k < 0: @@ -77,7 +78,6 @@ def evaluate(self, fit, count_fixed=False, kshift=0): return self.stat - def minpoints(self, npars): """Calculates the minimum number of points required to make an estimate of a model's quality.""" @@ -86,36 +86,39 @@ def minpoints(self, npars): def parpenalty(self, k, n): """Returns the cost for adding k parameters to the current model cluster.""" - #Weight the penalty for additional parameters. - #If this isn't 1 there had better be a good reason. - fudgefactor = 1. + # Weight the penalty for additional parameters. + # If this isn't 1 there had better be a good reason. + fudgefactor = 1.0 - return (2*k)*fudgefactor + return (2 * k) * fudgefactor def growth_justified(self, fit, k_prime): """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified given the current quality of the fit. - The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified - if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which - depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future.""" + The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified + if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which + depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future. + """ if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) - k_actual = fit.model.npars(count_fixed=False) #parameters in current fit - k_test = k_actual + k_prime #parameters in prospective fit - n = fit.size #the number of data points included in the fit + k_actual = fit.model.npars(count_fixed=False) # parameters in current fit + k_test = k_actual + k_prime # parameters in prospective fit + n = fit.size # the number of data points included in the fit # If there are too few points to calculate AIC with the requested number of parameter # then clearly that increase in parameters is not justified. if n < self.minpoints(k_test): return False - #assert n >= self.minPoints(kActual) #check that AIC is defined for the actual fit + # assert n >= self.minPoints(kActual) #check that AIC is defined for the actual fit if n < self.minpoints(k_actual): - logger.warn("AIC.growth_justified(): too few data to evaluate quality reliably.") - n=self.minpoints(k_actual) + logger.warn( + "AIC.growth_justified(): too few data to evaluate quality reliably." + ) + n = self.minpoints(k_actual) - penalty=self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) + penalty = self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) return penalty < self.chisq @@ -125,24 +128,25 @@ def akaikeweights(aics): aic_stats = np.array([aic.stat for aic in aics]) aic_min = min(aic_stats) - return np.exp(-(aic_stats-aic_min)/2.) + return np.exp(-(aic_stats - aic_min) / 2.0) @staticmethod def akaikeprobs(aics): """Return sequence of Akaike probabilities for sequence of AICs""" aic_weights = AIC.akaikeweights(aics) - return aic_weights/np.sum(aic_weights) + return aic_weights / np.sum(aic_weights) + # end of class AIC # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - m1=AIC() - m2=AIC() + m1 = AIC() + m2 = AIC() m1.stat = 20 m2.stat = 30 - print m2 > m1 + print(m2 > m1) diff --git a/diffpy/srmise/modelevaluators/aicc.py b/diffpy/srmise/modelevaluators/aicc.py index 57f0713..2e17e76 100644 --- a/diffpy/srmise/modelevaluators/aicc.py +++ b/diffpy/srmise/modelevaluators/aicc.py @@ -21,7 +21,8 @@ logger = logging.getLogger("diffpy.srmise") -class AICc (ModelEvaluator): + +class AICc(ModelEvaluator): """Evaluate and compare models with the AICc statistic. Akaike's Information Criterion w/ 2nd order correction for small sample @@ -50,13 +51,13 @@ def __init__(self): def evaluate(self, fit, count_fixed=False, kshift=0): """Return quality of fit for given ModelCluster using AICc (Akaike's Information Criterion - with 2nd order correction for small sample size). + with 2nd order correction for small sample size). - Parameters - fit: A ModelCluster - count_fixed: Whether fixed parameters are considered. - kshift: (0) Treat the model has having this many additional - parameters. Negative values also allowed.""" + Parameters + fit: A ModelCluster + count_fixed: Whether fixed parameters are considered. + kshift: (0) Treat the model has having this many additional + parameters. Negative values also allowed.""" # Number of parameters. By default, fixed parameters are ignored. k = fit.model.npars(count_fixed=count_fixed) + kshift if k < 0: @@ -77,7 +78,6 @@ def evaluate(self, fit, count_fixed=False, kshift=0): return self.stat - def minpoints(self, npars): """Calculates the minimum number of points required to make an estimate of a model's quality.""" @@ -88,36 +88,39 @@ def minpoints(self, npars): def parpenalty(self, k, n): """Returns the cost for adding k parameters to the current model cluster.""" - #Weight the penalty for additional parameters. - #If this isn't 1 there had better be a good reason. - fudgefactor = 1. + # Weight the penalty for additional parameters. + # If this isn't 1 there had better be a good reason. + fudgefactor = 1.0 - return (2*k+float(2*k*(k+1))/(n-k-1))*fudgefactor + return (2 * k + float(2 * k * (k + 1)) / (n - k - 1)) * fudgefactor def growth_justified(self, fit, k_prime): """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified given the current quality of the fit. - The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified - if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which - depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future.""" + The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified + if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which + depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future. + """ if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) - k_actual = fit.model.npars(count_fixed=False) #parameters in current fit - k_test = k_actual + k_prime #parameters in prospective fit - n = fit.size #the number of data points included in the fit + k_actual = fit.model.npars(count_fixed=False) # parameters in current fit + k_test = k_actual + k_prime # parameters in prospective fit + n = fit.size # the number of data points included in the fit # If there are too few points to calculate AICc with the requested number of parameter # then clearly that increase in parameters is not justified. if n < self.minpoints(k_test): return False - #assert n >= self.minPoints(kActual) #check that AICc is defined for the actual fit + # assert n >= self.minPoints(kActual) #check that AICc is defined for the actual fit if n < self.minpoints(k_actual): - logger.warn("AICc.growth_justified(): too few data to evaluate quality reliably.") - n=self.minpoints(k_actual) + logger.warn( + "AICc.growth_justified(): too few data to evaluate quality reliably." + ) + n = self.minpoints(k_actual) - penalty=self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) + penalty = self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) return penalty < self.chisq @@ -127,24 +130,25 @@ def akaikeweights(aics): aic_stats = np.array([aic.stat for aic in aics]) aic_min = min(aic_stats) - return np.exp(-(aic_stats-aic_min)/2.) + return np.exp(-(aic_stats - aic_min) / 2.0) @staticmethod def akaikeprobs(aics): """Return sequence of Akaike probabilities for sequence of AICs""" aic_weights = AICc.akaikeweights(aics) - return aic_weights/np.sum(aic_weights) + return aic_weights / np.sum(aic_weights) + # end of class AICc # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - m1=AICc() - m2=AICc() + m1 = AICc() + m2 = AICc() m1.stat = 20 m2.stat = 30 - print m2 > m1 + print(m2 > m1)