From 19fd41f703fa427a3588f78cfb55c5ee70eced8a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 28 Jul 2024 16:17:14 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit hooks --- diffpy/srmise/applications/extract.py | 773 ++++++++++++++++---------- diffpy/srmise/applications/plot.py | 432 ++++++++------ diffpy/srmise/basefunction.py | 137 +++-- diffpy/srmise/dataclusters.py | 158 ++++-- diffpy/srmise/modelcluster.py | 547 ++++++++++-------- diffpy/srmise/modelparts.py | 232 +++++--- diffpy/srmise/multimodelselection.py | 244 ++++---- diffpy/srmise/pdfdataset.py | 151 +++-- diffpy/srmise/peakextraction.py | 604 ++++++++++++-------- diffpy/srmise/peakstability.py | 62 ++- diffpy/srmise/srmiselog.py | 79 ++- 11 files changed, 2095 insertions(+), 1324 deletions(-) diff --git a/diffpy/srmise/applications/extract.py b/diffpy/srmise/applications/extract.py index bc50935..59ca6b2 100755 --- a/diffpy/srmise/applications/extract.py +++ b/diffpy/srmise/applications/extract.py @@ -20,254 +20,409 @@ def main(): """Default SrMise entry-point.""" - usage = ("usage: %prog pdf_file [options]\n" - "pdf_file is a file containing a PDF (accepts several " - "common formats), or a .srmise file.") + usage = ( + "usage: %prog pdf_file [options]\n" + "pdf_file is a file containing a PDF (accepts several " + "common formats), or a .srmise file." + ) from diffpy.srmise import __version__ - version = "diffpy.srmise "+__version__ - - descr = ("The SrMise package is a tool to aid extracting and fitting peaks " - "that comprise a pair distribution function. This script exposes " - "basic peak extraction functionality. For many PDFs it is " - "sufficient to specify the range, baseline, and sometimes an ad " - "hoc uncertainty. See the discussion of these options below for " - "further guidance.") - - epilog = ("Options set above override those from an existing .srmise " - "file, as well as the usual defaults summarized here.\n\n" - "Defaults (when qmax > 0)\n" - "------------------------\n" - "baseline - None (identically 0).\n" - "dg - The uncertainty reported in the PDF (if any), otherwise " - "5% of maximum value of PDF.\n" - "nyquist - True\n" - "range - All the data\n" - "cres - The Nyquist rate.\n" - "supersample - 4.0\n" - "scale - (Deprecated) False\n\n" - "Defaults (when qmax = 0)\n" - "------------------------\n" - "baseline - as above\n" - "dg - as above\n" - "nyquist - False (and no effect if True)\n" - "range - as above\n" - "cres - Four times the average distance between data points\n" - "supersample - Parameter has no effect.\n" - "scale - (Deprecated) False, and no effect if True\n\n" - "Known issues\n" - "------------\n" - "1) Peak extraction works best when the data are moderately " - "oversampled first. When qmax > 0 this is handled " - "automatically, but when qmax = 0 no resampling of any kind is " - "performed.\n" - "2) Peak extraction performed on a PDF file and a .srmise file " - "derived from that data with identical extraction parameters " - "can give different results even on the same platform. This is " - "because the original data may undergo some processing before it " - "can be saved by SrMise. For consistent results, always specify " - "the original PDF, or always load the PDF from a .srmise file " - "you save before performing any peak extraction on that data.\n" - "3) Liveplotting depends on the matplotlib backend, and doesn't " - "implement an idle handler, so interaction with its window will " - "likely cause a freeze.") + + version = "diffpy.srmise " + __version__ + + descr = ( + "The SrMise package is a tool to aid extracting and fitting peaks " + "that comprise a pair distribution function. This script exposes " + "basic peak extraction functionality. For many PDFs it is " + "sufficient to specify the range, baseline, and sometimes an ad " + "hoc uncertainty. See the discussion of these options below for " + "further guidance." + ) + + epilog = ( + "Options set above override those from an existing .srmise " + "file, as well as the usual defaults summarized here.\n\n" + "Defaults (when qmax > 0)\n" + "------------------------\n" + "baseline - None (identically 0).\n" + "dg - The uncertainty reported in the PDF (if any), otherwise " + "5% of maximum value of PDF.\n" + "nyquist - True\n" + "range - All the data\n" + "cres - The Nyquist rate.\n" + "supersample - 4.0\n" + "scale - (Deprecated) False\n\n" + "Defaults (when qmax = 0)\n" + "------------------------\n" + "baseline - as above\n" + "dg - as above\n" + "nyquist - False (and no effect if True)\n" + "range - as above\n" + "cres - Four times the average distance between data points\n" + "supersample - Parameter has no effect.\n" + "scale - (Deprecated) False, and no effect if True\n\n" + "Known issues\n" + "------------\n" + "1) Peak extraction works best when the data are moderately " + "oversampled first. When qmax > 0 this is handled " + "automatically, but when qmax = 0 no resampling of any kind is " + "performed.\n" + "2) Peak extraction performed on a PDF file and a .srmise file " + "derived from that data with identical extraction parameters " + "can give different results even on the same platform. This is " + "because the original data may undergo some processing before it " + "can be saved by SrMise. For consistent results, always specify " + "the original PDF, or always load the PDF from a .srmise file " + "you save before performing any peak extraction on that data.\n" + "3) Liveplotting depends on the matplotlib backend, and doesn't " + "implement an idle handler, so interaction with its window will " + "likely cause a freeze." + ) # TODO: Move to argparse (though not in 2.6 by default) to handle # variable-length options without callbacks. Longterm, the major # value is using the same option to specify a baseline that should # use estimation vs. one that should use explicitly provided pars. - parser = OptionParser(usage=usage, description=descr, epilog=epilog, - version=version, - formatter=IndentedHelpFormatterWithNL()) - - parser.set_defaults(plot=False, liveplot=False, wait=False, - performextraction=True, verbosity="warning") - dg_defaults = {'absolute':None, 'data':None, 'max-fraction':.05, - 'ptp-fraction':.05, 'dG-fraction':1.} - - parser.add_option("--extract", action="store_true", - dest="performextraction", - help="[Default] Perform extraction.") - parser.add_option("--no-extract", action="store_false", - dest="performextraction", - help="Do not perform extraction.") - parser.add_option("--range", nargs=2, dest="rng", type="float", - metavar="rmin rmax", - help="Extract over the range (rmin, rmax).") - parser.add_option("--qmax", dest="qmax", type="string", metavar="QMAX", - help="Model peaks with this maximum q value.") - parser.add_option("--nyquist", action="store_true", dest="nyquist", - help="Use Nyquist resampling if qmax > 0.") - parser.add_option("--no-nyquist", action="store_false", dest="nyquist", - help="Do not use Nyquist resampling.") - parser.add_option("--pf", dest="peakfunction", metavar="PF", - help="Fit peak function PF defined in " - "diffpy.srmise.peaks, e.g. " - "'GaussianOverR(maxwidth=0.7)'") - parser.add_option("--cres", dest="cres", type="float", metavar="cres", - help="Clustering resolution.") - parser.add_option("--supersample", dest="supersample", type="float", - metavar="SS", - help="Minimum initial oversampling rate as multiple of " - "Nyquist rate.") - parser.add_option("--me", "-m", dest="modelevaluator", metavar="ME", - help="ModelEvaluator defined in " - "diffpy.srmise.modelevaluators, e.g. 'AIC'") - - group = OptionGroup(parser, "Baseline Options", - "SrMise cannot determine the appropriate type of " - "baseline (e.g. crystalline vs. some nanoparticle) " - "solely from the data, so the user should specify the " - "appropriate type and/or parameters. (Default is " - "identically 0, which is unphysical.) SrMise keeps the " - "PDF baseline fixed at its initial value until the " - "final stages of peak extraction, so results are " - "frequently conditioned on that choice. (See the " - "SrMise documentation for details.) A good estimate " - "is therefore important for best results. SrMise can " - "estimate initial parameters from the data for linear " - "baselines in some situations (all peaks are positive, " - "and the degree of overlap in the region of extraction " - "is not too great), but in most cases it is best to " - "provide reasonable initial parameters. Run 'srmise " - "pdf_file.gr [baseline_option] --no-extract --plot' " - "for different values of the parameters for rapid " - "visual estimation.") - group.add_option("--baseline", dest="baseline", metavar="BL", - help="Estimate baseline from baseline function BL " - "defined in diffpy.srmise.baselines, e.g. " - "'Polynomial(degree=1)'. All parameters are free. " - "(Many POSIX shells attempt to interpret the " - "parentheses, and on these shells the option should " - "be surrounded by quotation marks.)" ) - group.add_option("--bcrystal", dest="bcrystal", type="string", - metavar="rho0[c]", - help="Use linear baseline defined by crystal number " - "density rho0. Append 'c' to make parameter " - "constant. Equivalent to " - "'--bpoly1 -4*pi*rho0[c] 0c'.") - group.add_option("--bsrmise", dest="bsrmise", type="string", metavar="file", - help="Use baseline from specified .srmise file.") - group.add_option("--bpoly0", dest="bpoly0", type="string", metavar="a0[c]", - help="Use constant baseline given by y=a0. " - "Append 'c' to make parameter constant.") - group.add_option("--bpoly1", dest="bpoly1", type="string", nargs=2, - metavar="a1[c] a0[c]", - help="Use baseline given by y=a1*x + a0. Append 'c' to " - "make parameter constant.") - group.add_option("--bpoly2", dest="bpoly2", type="string", nargs=3, - metavar="a2[c] a1[c] a0[c]", - help="Use baseline given by y=a2*x^2+a1*x + a0. Append " - "'c' to make parameter constant.") - group.add_option("--bseq", dest="bseq", type="string", metavar="FILE", - help="Use baseline interpolated from x,y values in FILE. " - "This baseline has no free parameters.") - group.add_option("--bspherical", dest="bspherical", type="string", nargs=2, - metavar="s[c] r[c]", - help="Use spherical nanoparticle baseline with scale s " - "and radius r. Append 'c' to make parameter " - "constant.") + parser = OptionParser( + usage=usage, + description=descr, + epilog=epilog, + version=version, + formatter=IndentedHelpFormatterWithNL(), + ) + + parser.set_defaults( + plot=False, + liveplot=False, + wait=False, + performextraction=True, + verbosity="warning", + ) + dg_defaults = { + "absolute": None, + "data": None, + "max-fraction": 0.05, + "ptp-fraction": 0.05, + "dG-fraction": 1.0, + } + + parser.add_option( + "--extract", + action="store_true", + dest="performextraction", + help="[Default] Perform extraction.", + ) + parser.add_option( + "--no-extract", + action="store_false", + dest="performextraction", + help="Do not perform extraction.", + ) + parser.add_option( + "--range", + nargs=2, + dest="rng", + type="float", + metavar="rmin rmax", + help="Extract over the range (rmin, rmax).", + ) + parser.add_option( + "--qmax", + dest="qmax", + type="string", + metavar="QMAX", + help="Model peaks with this maximum q value.", + ) + parser.add_option( + "--nyquist", + action="store_true", + dest="nyquist", + help="Use Nyquist resampling if qmax > 0.", + ) + parser.add_option( + "--no-nyquist", + action="store_false", + dest="nyquist", + help="Do not use Nyquist resampling.", + ) + parser.add_option( + "--pf", + dest="peakfunction", + metavar="PF", + help="Fit peak function PF defined in " + "diffpy.srmise.peaks, e.g. " + "'GaussianOverR(maxwidth=0.7)'", + ) + parser.add_option( + "--cres", + dest="cres", + type="float", + metavar="cres", + help="Clustering resolution.", + ) + parser.add_option( + "--supersample", + dest="supersample", + type="float", + metavar="SS", + help="Minimum initial oversampling rate as multiple of " "Nyquist rate.", + ) + parser.add_option( + "--me", + "-m", + dest="modelevaluator", + metavar="ME", + help="ModelEvaluator defined in " "diffpy.srmise.modelevaluators, e.g. 'AIC'", + ) + + group = OptionGroup( + parser, + "Baseline Options", + "SrMise cannot determine the appropriate type of " + "baseline (e.g. crystalline vs. some nanoparticle) " + "solely from the data, so the user should specify the " + "appropriate type and/or parameters. (Default is " + "identically 0, which is unphysical.) SrMise keeps the " + "PDF baseline fixed at its initial value until the " + "final stages of peak extraction, so results are " + "frequently conditioned on that choice. (See the " + "SrMise documentation for details.) A good estimate " + "is therefore important for best results. SrMise can " + "estimate initial parameters from the data for linear " + "baselines in some situations (all peaks are positive, " + "and the degree of overlap in the region of extraction " + "is not too great), but in most cases it is best to " + "provide reasonable initial parameters. Run 'srmise " + "pdf_file.gr [baseline_option] --no-extract --plot' " + "for different values of the parameters for rapid " + "visual estimation.", + ) + group.add_option( + "--baseline", + dest="baseline", + metavar="BL", + help="Estimate baseline from baseline function BL " + "defined in diffpy.srmise.baselines, e.g. " + "'Polynomial(degree=1)'. All parameters are free. " + "(Many POSIX shells attempt to interpret the " + "parentheses, and on these shells the option should " + "be surrounded by quotation marks.)", + ) + group.add_option( + "--bcrystal", + dest="bcrystal", + type="string", + metavar="rho0[c]", + help="Use linear baseline defined by crystal number " + "density rho0. Append 'c' to make parameter " + "constant. Equivalent to " + "'--bpoly1 -4*pi*rho0[c] 0c'.", + ) + group.add_option( + "--bsrmise", + dest="bsrmise", + type="string", + metavar="file", + help="Use baseline from specified .srmise file.", + ) + group.add_option( + "--bpoly0", + dest="bpoly0", + type="string", + metavar="a0[c]", + help="Use constant baseline given by y=a0. " + "Append 'c' to make parameter constant.", + ) + group.add_option( + "--bpoly1", + dest="bpoly1", + type="string", + nargs=2, + metavar="a1[c] a0[c]", + help="Use baseline given by y=a1*x + a0. Append 'c' to " + "make parameter constant.", + ) + group.add_option( + "--bpoly2", + dest="bpoly2", + type="string", + nargs=3, + metavar="a2[c] a1[c] a0[c]", + help="Use baseline given by y=a2*x^2+a1*x + a0. Append " + "'c' to make parameter constant.", + ) + group.add_option( + "--bseq", + dest="bseq", + type="string", + metavar="FILE", + help="Use baseline interpolated from x,y values in FILE. " + "This baseline has no free parameters.", + ) + group.add_option( + "--bspherical", + dest="bspherical", + type="string", + nargs=2, + metavar="s[c] r[c]", + help="Use spherical nanoparticle baseline with scale s " + "and radius r. Append 'c' to make parameter " + "constant.", + ) parser.add_option_group(group) - - group = OptionGroup(parser, "Uncertainty Options", - "Ideally a PDF reports the accurate experimentally " - "determined uncertainty. In practice, many PDFs " - "report none, while for others the reported values " - "are not necessarily reliable. (If in doubt, ask your " - "friendly neighborhood diffraction expert!) Even when " - "uncertainties are accurate, it can be " - "pragmatically useful to see how the results of " - "peak extraction change when assuming a different " - "value. Nevertheless, the primary determinant of " - "model complexity in SrMise is the uncertainty, so an " - "ad hoc uncertainty yields ad hoc model complexity. " - "See the SrMise documentation for further discussion, " - "including methods to mitigate this issue with " - "multimodel selection.") - group.add_option("--dg-mode", dest="dg_mode", type="choice", - choices=['absolute', 'data', 'max-fraction', - 'ptp-fraction', 'dG-fraction'], - help="Define how values passed to '--dg' are treated. " - "Possible values are: \n" - "'absolute' - The actual uncertainty in the PDF.\n" - "'max-fraction' - Fraction of max value in PDF.\n" - "'ptp-fraction' - Fraction of max minus min value " - "in the PDF.\n" - "'dG-fraction' - Fraction of dG reported by PDF.\n" - "If '--dg' is specified but mode is not, then mode " - "ia absolute. Otherwise, 'dG-fraction' is default " - "if the PDF reports uncertaintes, and 'max-fraction' " - "ia default if it does not.") - group.add_option("--dg", dest="dg", type="float", - help="Perform extraction assuming uncertainty dg. " - "Defaults depend on --dg-mode as follows:\n" - "'absolute'=%s\n" - "'max-fraction'=%s\n" - "'ptp-fraction'=%s\n" - "'dG-fraction'=%s" %(dg_defaults['absolute'], - dg_defaults['max-fraction'], - dg_defaults['ptp-fraction'], - dg_defaults['dG-fraction'])) -# group.add_option("--multimodel", nargs=3, dest="multimodel", type="float", -# metavar="dg_min dg_max n", -# help="Generate n models from dg_min to dg_max (given by " -# "--dg-mode) and perform multimodel analysis. " -# "This overrides any value given for --dg") + group = OptionGroup( + parser, + "Uncertainty Options", + "Ideally a PDF reports the accurate experimentally " + "determined uncertainty. In practice, many PDFs " + "report none, while for others the reported values " + "are not necessarily reliable. (If in doubt, ask your " + "friendly neighborhood diffraction expert!) Even when " + "uncertainties are accurate, it can be " + "pragmatically useful to see how the results of " + "peak extraction change when assuming a different " + "value. Nevertheless, the primary determinant of " + "model complexity in SrMise is the uncertainty, so an " + "ad hoc uncertainty yields ad hoc model complexity. " + "See the SrMise documentation for further discussion, " + "including methods to mitigate this issue with " + "multimodel selection.", + ) + group.add_option( + "--dg-mode", + dest="dg_mode", + type="choice", + choices=["absolute", "data", "max-fraction", "ptp-fraction", "dG-fraction"], + help="Define how values passed to '--dg' are treated. " + "Possible values are: \n" + "'absolute' - The actual uncertainty in the PDF.\n" + "'max-fraction' - Fraction of max value in PDF.\n" + "'ptp-fraction' - Fraction of max minus min value " + "in the PDF.\n" + "'dG-fraction' - Fraction of dG reported by PDF.\n" + "If '--dg' is specified but mode is not, then mode " + "ia absolute. Otherwise, 'dG-fraction' is default " + "if the PDF reports uncertaintes, and 'max-fraction' " + "ia default if it does not.", + ) + group.add_option( + "--dg", + dest="dg", + type="float", + help="Perform extraction assuming uncertainty dg. " + "Defaults depend on --dg-mode as follows:\n" + "'absolute'=%s\n" + "'max-fraction'=%s\n" + "'ptp-fraction'=%s\n" + "'dG-fraction'=%s" + % ( + dg_defaults["absolute"], + dg_defaults["max-fraction"], + dg_defaults["ptp-fraction"], + dg_defaults["dG-fraction"], + ), + ) + # group.add_option("--multimodel", nargs=3, dest="multimodel", type="float", + # metavar="dg_min dg_max n", + # help="Generate n models from dg_min to dg_max (given by " + # "--dg-mode) and perform multimodel analysis. " + # "This overrides any value given for --dg") parser.add_option_group(group) - - group = OptionGroup(parser, "Saving and Plotting Options", - "") - group.add_option("--pwa", dest="pwafile", metavar="FILE", - help="Save summary of result to FILE (.pwa format).") - group.add_option("--save", dest="savefile", metavar="FILE", - help="Save result of extraction to FILE (.srmise " - "format).") - group.add_option("--plot", "-p", action="store_true", dest="plot", - help="Plot extracted peaks.") - group.add_option("--liveplot", "-l", action="store_true", dest="liveplot", - help="(Experimental) Plot extracted peaks when fitting.") - group.add_option("--wait", "-w", action="store_true", dest="wait", - help="(Experimental) When using liveplot wait for user " - "after plotting.") + group = OptionGroup(parser, "Saving and Plotting Options", "") + group.add_option( + "--pwa", + dest="pwafile", + metavar="FILE", + help="Save summary of result to FILE (.pwa format).", + ) + group.add_option( + "--save", + dest="savefile", + metavar="FILE", + help="Save result of extraction to FILE (.srmise " "format).", + ) + group.add_option( + "--plot", "-p", action="store_true", dest="plot", help="Plot extracted peaks." + ) + group.add_option( + "--liveplot", + "-l", + action="store_true", + dest="liveplot", + help="(Experimental) Plot extracted peaks when fitting.", + ) + group.add_option( + "--wait", + "-w", + action="store_true", + dest="wait", + help="(Experimental) When using liveplot wait for user " "after plotting.", + ) parser.add_option_group(group) - - group = OptionGroup(parser, "Verbosity Options", - "Control detail printed to console.") - group.add_option("--informative", "-i", action="store_const", const="info", - dest="verbosity", - help="Summary of progress.") - group.add_option("--quiet", "-q", action="store_const", const="warning", - dest="verbosity", - help="[Default] Show minimal summary.") - group.add_option("--silent", "-s", action="store_const", const="critical", - dest="verbosity", - help="No non-critical output.") - group.add_option("--verbose", "-v", action="store_const", const="debug", - dest="verbosity", - help="Show verbose output.") + group = OptionGroup( + parser, "Verbosity Options", "Control detail printed to console." + ) + group.add_option( + "--informative", + "-i", + action="store_const", + const="info", + dest="verbosity", + help="Summary of progress.", + ) + group.add_option( + "--quiet", + "-q", + action="store_const", + const="warning", + dest="verbosity", + help="[Default] Show minimal summary.", + ) + group.add_option( + "--silent", + "-s", + action="store_const", + const="critical", + dest="verbosity", + help="No non-critical output.", + ) + group.add_option( + "--verbose", + "-v", + action="store_const", + const="debug", + dest="verbosity", + help="Show verbose output.", + ) parser.add_option_group(group) - group = OptionGroup(parser, "Deprecated Options", - "Not for general use.") - group.add_option("--scale", action="store_true", dest="scale", - help="(Deprecated) Scale supersampled uncertainties by " - "sqrt(oversampling) in intermediate steps when " - "Nyquist sampling.") - group.add_option("--no-scale", action="store_false", dest="scale", - help="(Deprecated) Never rescale uncertainties.") + group = OptionGroup(parser, "Deprecated Options", "Not for general use.") + group.add_option( + "--scale", + action="store_true", + dest="scale", + help="(Deprecated) Scale supersampled uncertainties by " + "sqrt(oversampling) in intermediate steps when " + "Nyquist sampling.", + ) + group.add_option( + "--no-scale", + action="store_false", + dest="scale", + help="(Deprecated) Never rescale uncertainties.", + ) parser.add_option_group(group) - (options, args) = parser.parse_args() if len(args) != 1: - parser.error("Exactly one argument required. \n"+usage) - + parser.error("Exactly one argument required. \n" + usage) from diffpy.srmise import srmiselog + srmiselog.setlevel(options.verbosity) from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction @@ -275,30 +430,34 @@ def main(): if options.peakfunction is not None: from diffpy.srmise import peaks + try: - options.peakfunction = eval("peaks."+options.peakfunction) + options.peakfunction = eval("peaks." + options.peakfunction) except Exception as err: print(err) - print("Could not create peak function '%s'. Exiting." \ - %options.peakfunction) + print( + "Could not create peak function '%s'. Exiting." % options.peakfunction + ) return if options.modelevaluator is not None: from diffpy.srmise import modelevaluators + try: - options.modelevaluator = \ - eval("modelevaluators."+options.modelevaluator) + options.modelevaluator = eval("modelevaluators." + options.modelevaluator) except Exception as err: print(err) - print ("Could not find ModelEvaluator '%s'. Exiting." \ - %options.modelevaluator) + print( + "Could not find ModelEvaluator '%s'. Exiting." % options.modelevaluator + ) return if options.bcrystal is not None: from diffpy.srmise.baselines import Polynomial + bl = Polynomial(degree=1) - options.baseline = parsepars(bl, [options.bcrystal, '0c']) - options.baseline.pars[0] = -4*np.pi*options.baseline.pars[0] + options.baseline = parsepars(bl, [options.bcrystal, "0c"]) + options.baseline.pars[0] = -4 * np.pi * options.baseline.pars[0] elif options.bsrmise is not None: # use baseline from existing file blext = PDFPeakExtraction() @@ -306,31 +465,37 @@ def main(): options.baseline = blext.extracted.baseline elif options.bpoly0 is not None: from diffpy.srmise.baselines import Polynomial + bl = Polynomial(degree=0) options.baseline = parsepars(bl, [options.bpoly0]) elif options.bpoly1 is not None: from diffpy.srmise.baselines import Polynomial + bl = Polynomial(degree=1) options.baseline = parsepars(bl, options.bpoly1) elif options.bpoly2 is not None: from diffpy.srmise.baselines import Polynomial + bl = Polynomial(degree=2) options.baseline = parsepars(bl, options.bpoly2) elif options.bseq is not None: from diffpy.srmise.baselines import FromSequence + bl = FromSequence(options.bseq) options.baseline = bl.actualize([], "internal") elif options.bspherical is not None: from diffpy.srmise.baselines import NanoSpherical + bl = NanoSpherical() options.baseline = parsepars(bl, options.bspherical) elif options.baseline is not None: from diffpy.srmise import baselines + try: - options.baseline = eval("baselines."+options.baseline) + options.baseline = eval("baselines." + options.baseline) except Exception as err: print(err) - print("Could not create baseline '%s'. Exiting." %options.baseline) + print("Could not create baseline '%s'. Exiting." % options.baseline) return filename = args[0] @@ -359,17 +524,19 @@ def main(): if options.dg is None: options.dg = dg_defaults[options.dg_mode] if options.dg_mode == "absolute": - pdict["effective_dy"] = options.dg*np.ones(len(ext.x)) + pdict["effective_dy"] = options.dg * np.ones(len(ext.x)) elif options.dg_mode == "max-fraction": - pdict["effective_dy"] = options.dg*ext.y.max()*np.ones(len(ext.x)) + pdict["effective_dy"] = options.dg * ext.y.max() * np.ones(len(ext.x)) elif options.dg_mode == "ptp-fraction": - pdict["effective_dy"] = options.dg*ext.y.ptp()*np.ones(len(ext.y)) + pdict["effective_dy"] = options.dg * ext.y.ptp() * np.ones(len(ext.y)) elif options.dg_mode == "dG-fraction": - pdict["effective_dy"] = options.dg*ext.dy + pdict["effective_dy"] = options.dg * ext.dy if options.rng is not None: pdict["rng"] = list(options.rng) if options.qmax is not None: - pdict["qmax"] = options.qmax if options.qmax == "automatic" else float(options.qmax) + pdict["qmax"] = ( + options.qmax if options.qmax == "automatic" else float(options.qmax) + ) if options.nyquist is not None: pdict["nyquist"] = options.nyquist if options.supersample is not None: @@ -381,6 +548,7 @@ def main(): if options.liveplot: from diffpy.srmise import srmiselog + srmiselog.liveplotting(True, options.wait) ext.setvars(**pdict) @@ -394,16 +562,14 @@ def main(): ext.write(options.savefile) except SrMiseFileError as err: print(err) - print("Could not save result to '%s'." %options.savefile) - + print("Could not save result to '%s'." % options.savefile) if options.pwafile is not None: try: ext.writepwa(options.pwafile) except SrMiseFileError as err: print(err) - print("Could not save pwa summary to '%s'." %options.pwafile) - + print("Could not save pwa summary to '%s'." % options.pwafile) print(ext) if cov is not None: @@ -411,11 +577,13 @@ def main(): if options.plot: from diffpy.srmise.applications.plot import makeplot + makeplot(ext) plt.show() elif options.liveplot: plt.show() + def parsepars(mp, parseq): """Return actualized model from sequence of strings. @@ -430,7 +598,7 @@ def parsepars(mp, parseq): pars = [] free = [] for p in parseq: - if p[-1] == 'c': + if p[-1] == "c": pars.append(float(p[0:-1])) free.append(False) else: @@ -448,60 +616,63 @@ def parsepars(mp, parseq): class IndentedHelpFormatterWithNL(IndentedHelpFormatter): - def _format_text(self, text): - if not text: return "" - text_width = self.width - self.current_indent - indent = " "*self.current_indent -# the above is still the same - bits = text.split('\n') - formatted_bits = [ - textwrap.fill(bit, - text_width, - initial_indent=indent, - subsequent_indent=indent) - for bit in bits] - result = "\n".join(formatted_bits) + "\n" - return result - - def format_option(self, option): - # The help for each option consists of two parts: - # * the opt strings and metavars - # eg. ("-x", or "-fFILENAME, --file=FILENAME") - # * the user-supplied help string - # eg. ("turn on expert mode", "read data from FILENAME") - # - # If possible, we write both of these on the same line: - # -x turn on expert mode - # - # But if the opt string list is too long, we put the help - # string on a second line, indented to the same column it would - # start in if it fit on the first line. - # -fFILENAME, --file=FILENAME - # read data from FILENAME - result = [] - opts = self.option_strings[option] - opt_width = self.help_position - self.current_indent - 2 - if len(opts) > opt_width: - opts = "%*s%s\n" % (self.current_indent, "", opts) - indent_first = self.help_position - else: # start help on same line as opts - opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts) - indent_first = 0 - result.append(opts) - if option.help: - help_text = self.expand_default(option) -# Everything is the same up through here - help_lines = [] - for para in help_text.split("\n"): - help_lines.extend(textwrap.wrap(para, self.help_width)) -# Everything is the same after here - result.append("%*s%s\n" % ( - indent_first, "", help_lines[0])) - result.extend(["%*s%s\n" % (self.help_position, "", line) - for line in help_lines[1:]]) - elif opts[-1] != "\n": - result.append("\n") - return "".join(result) + def _format_text(self, text): + if not text: + return "" + text_width = self.width - self.current_indent + indent = " " * self.current_indent + # the above is still the same + bits = text.split("\n") + formatted_bits = [ + textwrap.fill( + bit, text_width, initial_indent=indent, subsequent_indent=indent + ) + for bit in bits + ] + result = "\n".join(formatted_bits) + "\n" + return result + + def format_option(self, option): + # The help for each option consists of two parts: + # * the opt strings and metavars + # eg. ("-x", or "-fFILENAME, --file=FILENAME") + # * the user-supplied help string + # eg. ("turn on expert mode", "read data from FILENAME") + # + # If possible, we write both of these on the same line: + # -x turn on expert mode + # + # But if the opt string list is too long, we put the help + # string on a second line, indented to the same column it would + # start in if it fit on the first line. + # -fFILENAME, --file=FILENAME + # read data from FILENAME + result = [] + opts = self.option_strings[option] + opt_width = self.help_position - self.current_indent - 2 + if len(opts) > opt_width: + opts = "%*s%s\n" % (self.current_indent, "", opts) + indent_first = self.help_position + else: # start help on same line as opts + opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts) + indent_first = 0 + result.append(opts) + if option.help: + help_text = self.expand_default(option) + # Everything is the same up through here + help_lines = [] + for para in help_text.split("\n"): + help_lines.extend(textwrap.wrap(para, self.help_width)) + # Everything is the same after here + result.append("%*s%s\n" % (indent_first, "", help_lines[0])) + result.extend( + ["%*s%s\n" % (self.help_position, "", line) for line in help_lines[1:]] + ) + elif opts[-1] != "\n": + result.append("\n") + return "".join(result) + + ### End class if __name__ == "__main__": diff --git a/diffpy/srmise/applications/plot.py b/diffpy/srmise/applications/plot.py index a5d16a1..b6b788a 100755 --- a/diffpy/srmise/applications/plot.py +++ b/diffpy/srmise/applications/plot.py @@ -16,9 +16,9 @@ import sys import matplotlib.pyplot as plt -from matplotlib.pyplot import MultipleLocator import mpl_toolkits.axisartist as AA import numpy as np +from matplotlib.pyplot import MultipleLocator from mpl_toolkits.axes_grid1 import make_axes_locatable from mpl_toolkits.axes_grid1.inset_locator import inset_axes @@ -28,49 +28,63 @@ # For a given figure, returns a label of interest labeldict = {} -default_gobs_style = {'color' : 'b', 'linestyle' : '', - 'markeredgecolor' : 'b', 'marker' : 'o', - 'markerfacecolor' : 'none', 'markersize' : 4} - -default_gfit_style = {'color' : 'g'} -default_gind_style = {'facecolor' : 'green', 'alpha' : 0.2} +default_gobs_style = { + "color": "b", + "linestyle": "", + "markeredgecolor": "b", + "marker": "o", + "markerfacecolor": "none", + "markersize": 4, +} + +default_gfit_style = {"color": "g"} +default_gind_style = {"facecolor": "green", "alpha": 0.2} default_gres_style = {} default_ep_style = {} default_ip_style = {} -default_dg_style = {'linestyle' : 'none', 'color' : 'black', - 'marker' : 'o', 'markerfacecolor' : 'black', - 'markeredgecolor' : 'black', - 'markersize' : 1, 'antialiased': False} +default_dg_style = { + "linestyle": "none", + "color": "black", + "marker": "o", + "markerfacecolor": "black", + "markeredgecolor": "black", + "markersize": 1, + "antialiased": False, +} def setfigformat(figsize): from matplotlib import rc - rc('legend', numpoints=2) - rc('figure', figsize=figsize) - rc('axes', titlesize=12, labelsize=11) - rc('xtick', labelsize=10) - rc('ytick', labelsize=10) - rc('lines', linewidth=0.75, markeredgewidth=0.5) + + rc("legend", numpoints=2) + rc("figure", figsize=figsize) + rc("axes", titlesize=12, labelsize=11) + rc("xtick", labelsize=10) + rc("ytick", labelsize=10) + rc("lines", linewidth=0.75, markeredgewidth=0.5) return + def gr_formataxis(ax=None): - if ax is None: ax = plt.gca() + if ax is None: + ax = plt.gca() ax.xaxis.set_minor_locator(MultipleLocator(1)) - ax.yaxis.set_label_position('left') + ax.yaxis.set_label_position("left") ax.yaxis.tick_left() - ax.yaxis.set_ticks_position('both') + ax.yaxis.set_ticks_position("both") return + def comparepositions(ppe, ip=None, **kwds): ax = kwds.get("ax", plt.gca()) - base = kwds.get("base", 0.) - yideal = kwds.get("yideal", -1.) - yext = kwds.get("yext", 1.) + base = kwds.get("base", 0.0) + yideal = kwds.get("yideal", -1.0) + yext = kwds.get("yext", 1.0) ep_style = kwds.get("ep_style", default_ep_style) ip_style = kwds.get("ip_style", default_ip_style) - yideal_label = kwds.get("yideal_label", r'ideal') - yext_label = kwds.get("yext_label", r'found') + yideal_label = kwds.get("yideal_label", r"ideal") + yext_label = kwds.get("yext_label", r"found") pmin = kwds.get("pmin", -np.inf) pmax = kwds.get("pmax", np.inf) @@ -78,39 +92,39 @@ def comparepositions(ppe, ip=None, **kwds): ep = [p for p in ep if p >= pmin and p <= pmax] if ip is not None: - xi = np.NaN + np.zeros(3*len(ip)) + xi = np.NaN + np.zeros(3 * len(ip)) xi[0::3] = ip xi[1::3] = ip yi = np.zeros_like(xi) + base yi[1::3] += yideal - plt.plot(xi, yi, 'b', lw=1.5, **ip_style) + plt.plot(xi, yi, "b", lw=1.5, **ip_style) - xe = np.NaN + np.zeros(3*len(ep)) + xe = np.NaN + np.zeros(3 * len(ep)) xe[0::3] = ep xe[1::3] = ep ye = np.zeros_like(xe) + base ye[1::3] += yext - plt.plot(xe, ye, 'g', lw=1.5, **ep_style) + plt.plot(xe, ye, "g", lw=1.5, **ep_style) if ip is not None: yb = (base, base) - plt.axhline(base, linestyle=":", color="k" ) - ax.yaxis.set_ticks([base+.5*yideal, base+.5*yext]) + plt.axhline(base, linestyle=":", color="k") + ax.yaxis.set_ticks([base + 0.5 * yideal, base + 0.5 * yext]) ax.yaxis.set_ticklabels([yideal_label, yext_label]) else: - ax.yaxis.set_ticks([base+.5*yext]) + ax.yaxis.set_ticks([base + 0.5 * yext]) ax.yaxis.set_ticklabels([yext_label]) # Set ylim explicitly, for case where yext is empty. if ip is not None: - plt.ylim(base+yideal, base+yext) + plt.ylim(base + yideal, base + yext) else: - plt.ylim(base, base+yext) + plt.ylim(base, base + yext) for tick in ax.yaxis.get_major_ticks(): tick.tick1line.set_markersize(0) tick.tick2line.set_markersize(0) - tick.label1.set_verticalalignment('center') + tick.label1.set_verticalalignment("center") tick.label1.set_fontsize(8) ticks = ax.yaxis.get_major_ticks() ticks[-1].label1.set_color("green") @@ -118,37 +132,45 @@ def comparepositions(ppe, ip=None, **kwds): ticks[0].label1.set_color("blue") return + def dgseries(stability, **kwds): ax = kwds.get("ax", plt.gca()) dg_style = kwds.get("dg_style", default_dg_style) - scale = kwds.get("scale", 1.) + scale = kwds.get("scale", 1.0) - dgmin = kwds.get("dgmin", stability.results[0][0])*scale - dgmax = kwds.get("dgmax", stability.results[-1][0])*scale + dgmin = kwds.get("dgmin", stability.results[0][0]) * scale + dgmax = kwds.get("dgmax", stability.results[-1][0]) * scale - pmin = kwds.get("pmin", 0.) + pmin = kwds.get("pmin", 0.0) pmax = kwds.get("pmax", np.inf) x = [] y = [] for dg, peaks, bl, dr in stability.results: - if dg*scale < dgmin or dg*scale > dgmax: + if dg * scale < dgmin or dg * scale > dgmax: continue peakpos = [p["position"] for p in peaks] peakpos = [p for p in peakpos if p >= pmin and p <= pmax] x.extend(peakpos) - y.extend(np.zeros_like(peakpos) + dg*scale) + y.extend(np.zeros_like(peakpos) + dg * scale) plt.plot(x, y, **dg_style) + def labelallsubplots(): rv = [] - for i, c in enumerate('abcd'): + for i, c in enumerate("abcd"): plt.subplot(221 + i) s = "(%s)" % c - ht = plt.text(0.04, 0.95, s, - horizontalalignment='left', verticalalignment='top', - transform=plt.gca().transAxes, weight='bold') + ht = plt.text( + 0.04, + 0.95, + s, + horizontalalignment="left", + verticalalignment="top", + transform=plt.gca().transAxes, + weight="bold", + ) rv.append(ht) return rv @@ -165,14 +187,16 @@ def makeplot(ppe_or_stability, ip=None, **kwds): if ppe.extracted is None: # Makeplot requires a ModelCluster, so whip one up. from diffpy.srmise import ModelCluster - ppe.defaultvars() # Make sure everything has some setting. This - # shouldn't have harmful side effects. + + ppe.defaultvars() # Make sure everything has some setting. This + # shouldn't have harmful side effects. rangeslice = ppe.getrangeslice() x = ppe.x[rangeslice] y = ppe.y[rangeslice] dy = ppe.effective_dy[rangeslice] - mcluster = ModelCluster(ppe.initial_peaks, ppe.baseline, x, y, \ - dy, None, ppe.error_method, ppe.pf) + mcluster = ModelCluster( + ppe.initial_peaks, ppe.baseline, x, y, dy, None, ppe.error_method, ppe.pf + ) ext = mcluster else: ext = ppe.extracted @@ -190,14 +214,14 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Define heights and interstitial offsets # All values in percent of main axis. - top_offset = kwds.get("top_offset", 0.) - dg_height = kwds.get("dg_height", 15. if stability is not None else 0.) - cmp_height = kwds.get("cmp_height", 15. if ip is not None else 7.5) - datatop_offset = kwds.get("datatop_offset", 3.) + top_offset = kwds.get("top_offset", 0.0) + dg_height = kwds.get("dg_height", 15.0 if stability is not None else 0.0) + cmp_height = kwds.get("cmp_height", 15.0 if ip is not None else 7.5) + datatop_offset = kwds.get("datatop_offset", 3.0) # <- Data appears here -> - databottom_offset = kwds.get("databottom_offset", 3.) + databottom_offset = kwds.get("databottom_offset", 3.0) # <- Residual appears here -> - bottom_offset = kwds.get("bottom_offset", 3.) + bottom_offset = kwds.get("bottom_offset", 3.0) # Style options dg_style = kwds.get("dg_style", default_dg_style) @@ -209,83 +233,94 @@ def makeplot(ppe_or_stability, ip=None, **kwds): ip_style = kwds.get("ip_style", default_ip_style) # Label options - userxlabel = kwds.get("xlabel", r'r ($\mathrm{\AA}$)') - userylabel = kwds.get("ylabel", r'G ($\mathrm{\AA^{-2}}$)') - datalabelx = kwds.get("datalabelx", .04) - yideal_label = kwds.get("yideal_label", r'ideal') - yext_label = kwds.get("yext_label", r'found') + userxlabel = kwds.get("xlabel", r"r ($\mathrm{\AA}$)") + userylabel = kwds.get("ylabel", r"G ($\mathrm{\AA^{-2}}$)") + datalabelx = kwds.get("datalabelx", 0.04) + yideal_label = kwds.get("yideal_label", r"ideal") + yext_label = kwds.get("yext_label", r"found") # Other options datalabel = kwds.get("datalabel", None) - dgformatstr = kwds.get("dgformatstr", r'$\delta$g=%f') - dgformatpost = kwds.get("dgformatpost", None) #->userfunction(string) + dgformatstr = kwds.get("dgformatstr", r"$\delta$g=%f") + dgformatpost = kwds.get("dgformatpost", None) # ->userfunction(string) show_fit = kwds.get("show_fit", True) show_individual = kwds.get("show_individual", True) fill_individual = kwds.get("fill_individual", True) show_observed = kwds.get("show_observed", True) show_residual = kwds.get("show_residual", True) - mask_residual = kwds.get("mask_residual", False) #-> number + mask_residual = kwds.get("mask_residual", False) # -> number show_annotation = kwds.get("show_annotation", True) - scale = kwds.get("scale", 1.) # Apply a global scaling factor to the data - - + scale = kwds.get("scale", 1.0) # Apply a global scaling factor to the data # Define the various data which will be plotted r = ext.r_cluster - dr = (r[-1]-r[0])/len(r) - rexpand = np.concatenate((np.arange(r[0]-dr, xlo, -dr)[::-1], r, np.arange(r[-1]+dr, xhi+dr, dr))) - rfine = np.arange(r[0], r[-1], .1*dr) - gr_obs = np.array(resample(ppe.x, ppe.y, rexpand))*scale - #gr_fit = resample(r, ext.value(), rfine) - gr_fit = np.array(ext.value(rfine))*scale - gr_fit_baseline = np.array(ext.valuebl(rfine))*scale - gr_fit_ind = [gr_fit_baseline + np.array(p.value(rfine))*scale for p in ext.model] - gr_res = np.array(ext.residual())*scale + dr = (r[-1] - r[0]) / len(r) + rexpand = np.concatenate( + (np.arange(r[0] - dr, xlo, -dr)[::-1], r, np.arange(r[-1] + dr, xhi + dr, dr)) + ) + rfine = np.arange(r[0], r[-1], 0.1 * dr) + gr_obs = np.array(resample(ppe.x, ppe.y, rexpand)) * scale + # gr_fit = resample(r, ext.value(), rfine) + gr_fit = np.array(ext.value(rfine)) * scale + gr_fit_baseline = np.array(ext.valuebl(rfine)) * scale + gr_fit_ind = [gr_fit_baseline + np.array(p.value(rfine)) * scale for p in ext.model] + gr_res = np.array(ext.residual()) * scale if mask_residual: gr_res = np.ma.masked_outside(gr_res, -mask_residual, mask_residual) all_gr = [] - if show_fit: all_gr.append(gr_fit) - #if show_individual: all_gr.extend([gr_fit_baseline, gr_fit_ind]) + if show_fit: + all_gr.append(gr_fit) + # if show_individual: all_gr.extend([gr_fit_baseline, gr_fit_ind]) if show_individual: all_gr.append(gr_fit_baseline) if len(gr_fit_ind) > 0: all_gr.extend(gr_fit_ind) - if show_observed: all_gr.append(gr_obs) + if show_observed: + all_gr.append(gr_obs) # gr_fit_ind is a list of lists, so use np.min/max # The funky bit with scale makes sure that a user-specified value # has scale applied to it, without messing up the default values, # which are calculated from already scaled quantities. - min_gr = kwds.get("min_gr", np.min([np.min(gr) for gr in all_gr])/scale)*scale - max_gr = kwds.get("max_gr", np.max([np.max(gr) for gr in all_gr])/scale)*scale - + min_gr = kwds.get("min_gr", np.min([np.min(gr) for gr in all_gr]) / scale) * scale + max_gr = kwds.get("max_gr", np.max([np.max(gr) for gr in all_gr]) / scale) * scale if show_residual: min_res = np.min(gr_res) max_res = np.max(gr_res) else: - min_res = 0. - max_res = 0. + min_res = 0.0 + max_res = 0.0 # Derive various y limits based on all the offsets - rel_height = 100. - top_offset - dg_height - cmp_height - datatop_offset - databottom_offset - bottom_offset - abs_height = 100*((max_gr - min_gr) + (max_res - min_res))/rel_height - - yhi = max_gr + (top_offset + dg_height + cmp_height + datatop_offset)*abs_height/100 + rel_height = ( + 100.0 + - top_offset + - dg_height + - cmp_height + - datatop_offset + - databottom_offset + - bottom_offset + ) + abs_height = 100 * ((max_gr - min_gr) + (max_res - min_res)) / rel_height + + yhi = ( + max_gr + + (top_offset + dg_height + cmp_height + datatop_offset) * abs_height / 100 + ) ylo = yhi - abs_height yhi = kwds.get("yhi", yhi) ylo = kwds.get("ylo", ylo) - datatop = yhi - (yhi-ylo)*.01*(top_offset + dg_height + cmp_height) - datalabeltop = 1 - .01*(top_offset + dg_height + cmp_height + datatop_offset) - resbase = ylo + bottom_offset*abs_height/100 - min_res + datatop = yhi - (yhi - ylo) * 0.01 * (top_offset + dg_height + cmp_height) + datalabeltop = 1 - 0.01 * (top_offset + dg_height + cmp_height + datatop_offset) + resbase = ylo + bottom_offset * abs_height / 100 - min_res resbase = kwds.get("resbase", resbase) - fig = kwds.get("figure", plt.gcf()) fig.clf() ax_data = AA.Subplot(fig, 111) @@ -302,8 +337,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): for peak in gr_fit_ind: plt.fill_between(rfine, gr_fit_baseline, peak, **gind_style) if show_residual: - plt.plot(r, gr_res + resbase, 'r-', **gres_style) - plt.plot((xlo, xhi), 2*[resbase], 'k:') + plt.plot(r, gr_res + resbase, "r-", **gres_style) + plt.plot((xlo, xhi), 2 * [resbase], "k:") # Format ax_data plt.xlim(xlo, xhi) @@ -311,27 +346,41 @@ def makeplot(ppe_or_stability, ip=None, **kwds): plt.xlabel(userxlabel) plt.ylabel(userylabel) ax_data.xaxis.set_minor_locator(plt.MultipleLocator(1)) - #ax_data.yaxis.set_minor_locator(plt.MultipleLocator(np.max([1,int((yhi-ylo)/20)]))) - ax_data.yaxis.set_label_position('left') + # ax_data.yaxis.set_minor_locator(plt.MultipleLocator(np.max([1,int((yhi-ylo)/20)]))) + ax_data.yaxis.set_label_position("left") ax_data.yaxis.tick_left() - ax_data.yaxis.set_ticks_position('both') + ax_data.yaxis.set_ticks_position("both") # Remove labels above where insets begin - #ax_data.yaxis.set_ticklabels([str(int(loc)) for loc in ax_data.yaxis.get_majorticklocs() if loc < datatop]) - ax_data.yaxis.set_ticks([loc for loc in ax_data.yaxis.get_majorticklocs() if (loc < datatop and loc >= ylo)]) - + # ax_data.yaxis.set_ticklabels([str(int(loc)) for loc in ax_data.yaxis.get_majorticklocs() if loc < datatop]) + ax_data.yaxis.set_ticks( + [ + loc + for loc in ax_data.yaxis.get_majorticklocs() + if (loc < datatop and loc >= ylo) + ] + ) # Dataset label if datalabel is not None: - dl = plt.text(datalabelx, datalabeltop, datalabel, ha='left', va='top', - transform=ax_data.transAxes, weight='bold') + dl = plt.text( + datalabelx, + datalabeltop, + datalabel, + ha="left", + va="top", + transform=ax_data.transAxes, + weight="bold", + ) else: dl = None figdict["datalabel"] = dl # Create new x axis at bottom edge of compare inset ax_data.axis["top"].set_visible(False) - ax_data.axis["newtop"] = ax_data.new_floating_axis(0, datatop, axis_direction="bottom") # "top" bugged? + ax_data.axis["newtop"] = ax_data.new_floating_axis( + 0, datatop, axis_direction="bottom" + ) # "top" bugged? ax_data.axis["newtop"].toggle(all=False, ticks=True) ax_data.axis["newtop"].major_ticks.set_tick_out(True) ax_data.axis["newtop"].minor_ticks.set_tick_out(True) @@ -340,37 +389,57 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # The original label is invisible, but we use its (dynamic) x position # to update the new label, which we define have the correct y position. # A bit of a tradeoff for the nice insets and ability to define new axes. - newylabel = plt.text(-.1, .5*(datatop-ylo)/(yhi-ylo), userylabel, - ha='center', va='center', rotation='vertical', transform=ax_data.transAxes) - labeldict[fig] = newylabel # so we can find the correct text object - fig.canvas.mpl_connect('draw_event', on_draw) # original label invisibility and updating + newylabel = plt.text( + -0.1, + 0.5 * (datatop - ylo) / (yhi - ylo), + userylabel, + ha="center", + va="center", + rotation="vertical", + transform=ax_data.transAxes, + ) + labeldict[fig] = newylabel # so we can find the correct text object + fig.canvas.mpl_connect( + "draw_event", on_draw + ) # original label invisibility and updating # Compare extracted (and ideal, if provided) peak positions clearly. if cmp_height > 0: - ax_cmp = inset_axes(ax_data, - width="100%", - height="%s%%" %cmp_height, - loc=2, - bbox_to_anchor=(0., -.01*(top_offset+dg_height), 1, 1), - bbox_transform=ax_data.transAxes, - borderpad=0) + ax_cmp = inset_axes( + ax_data, + width="100%", + height="%s%%" % cmp_height, + loc=2, + bbox_to_anchor=(0.0, -0.01 * (top_offset + dg_height), 1, 1), + bbox_transform=ax_data.transAxes, + borderpad=0, + ) figdict["cmp"] = ax_cmp plt.axes(ax_cmp) - comparepositions(ext, ip, ep_style=ep_style, ip_style=ip_style, yideal_label=yideal_label, yext_label=yext_label) + comparepositions( + ext, + ip, + ep_style=ep_style, + ip_style=ip_style, + yideal_label=yideal_label, + yext_label=yext_label, + ) plt.xlim(xlo, xhi) ax_cmp.set_xticks([]) # Show how extracted peak positions change as dg is changed if dg_height > 0: - ax_dg = inset_axes(ax_data, - width="100%", - height="%s%%" %dg_height, - loc=2, - bbox_to_anchor=(0, -.01*top_offset, 1, 1), - bbox_transform=ax_data.transAxes, - borderpad=0) + ax_dg = inset_axes( + ax_data, + width="100%", + height="%s%%" % dg_height, + loc=2, + bbox_to_anchor=(0, -0.01 * top_offset, 1, 1), + bbox_transform=ax_data.transAxes, + borderpad=0, + ) figdict["dg"] = ax_dg plt.axes(ax_dg) @@ -385,31 +454,42 @@ def makeplot(ppe_or_stability, ip=None, **kwds): plt.xlim(xlo, xhi) ax_dg.xaxis.set_major_locator(plt.NullLocator()) ax_dg.yaxis.set_major_locator(plt.MaxNLocator(3)) - plt.ylabel(r'$\delta$g') + plt.ylabel(r"$\delta$g") # Annotate the actual dg shown if show_annotation: - dg = np.mean(ext.error_cluster)*scale - dgstr = dgformatstr %dg - if "dgformatpost" in kwds: #post-processing on dg annotation + dg = np.mean(ext.error_cluster) * scale + dgstr = dgformatstr % dg + if "dgformatpost" in kwds: # post-processing on dg annotation dgstr = kwds["dgformatpost"](dgstr) if len(ext.model) > 0: - xpos = np.mean([xlo, ext.model[0]["position"]]) # OK for now. + xpos = np.mean([xlo, ext.model[0]["position"]]) # OK for now. else: - xpos = xlo + .1*(xhi-xlo) + xpos = xlo + 0.1 * (xhi - xlo) if dg_height > 0 and cmp_height > 0: # Arrow, text in compare distances line ylo2, yhi2 = ax_dg.get_ylim() if ip is not None: - ypos = ylo2 - .25*cmp_height/dg_height*(yhi2-ylo2) + ypos = ylo2 - 0.25 * cmp_height / dg_height * (yhi2 - ylo2) else: - ypos = ylo2 - .5*cmp_height/dg_height*(yhi2-ylo2) - plt.annotate(dgstr, xy=(xlo, dg), xycoords='data', va='center', ha='center', - xytext=(xpos,ypos), textcoords='data', size=8, color='green', - arrowprops=dict(arrowstyle="->", - connectionstyle="angle,angleA=90,angleB=0,rad=10", - color="green")) + ypos = ylo2 - 0.5 * cmp_height / dg_height * (yhi2 - ylo2) + plt.annotate( + dgstr, + xy=(xlo, dg), + xycoords="data", + va="center", + ha="center", + xytext=(xpos, ypos), + textcoords="data", + size=8, + color="green", + arrowprops=dict( + arrowstyle="->", + connectionstyle="angle,angleA=90,angleB=0,rad=10", + color="green", + ), + ) elif dg_height > 0: # Arrow, and text located somewhere in main plot region @@ -420,8 +500,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Must change axes plt.axes(ax_cmp) ylo2, yhi2 = ax_cmp.get_ylim() - ypos = yhi2/2. - plt.text(xpos, ypos, dgstr, va='center', ha='center', size=8, color='green') + ypos = yhi2 / 2.0 + plt.text(xpos, ypos, dgstr, va="center", ha="center", size=8, color="green") else: # Text only in main plot region # Must change axes @@ -440,6 +520,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # invisiblelabel must be temporarily made visible to update # its values. _lastxpos = {} + + def on_draw(event): global _lastxpos fig = event.canvas.figure @@ -459,7 +541,7 @@ def on_draw(event): # If it is kept visible the whole time this problem doesn't occur. # This problem doesn't occur onscreen (TkAgg) or printing PDFs, and # didn't occur in matplotlib 1.0.0. - if abs(xpos - _lastxpos.get(fig, 0)) > .001: + if abs(xpos - _lastxpos.get(fig, 0)) > 0.001: _lastxpos[fig] = xpos plt.draw() else: @@ -467,25 +549,27 @@ def on_draw(event): invisiblelabel.set_visible(False) xpos_old = visiblelabel.get_position()[0] - if abs(xpos - xpos_old) > .001: + if abs(xpos - xpos_old) > 0.001: labeldict[fig].set_x(xpos) plt.draw() return + def readcompare(filename): """Returns a list of distances read from filename, otherwise None.""" from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError # TODO: Make this safer try: - datastring = open(filename,'rb').read() + datastring = open(filename, "rb").read() except Exception as err: raise err import re - res = re.search(r'^[^#]', datastring, re.M) + + res = re.search(r"^[^#]", datastring, re.M) if res: - datastring = datastring[res.end():].strip() + datastring = datastring[res.end() :].strip() distances = [] @@ -493,7 +577,7 @@ def readcompare(filename): for line in datastring.split("\n"): distances.append(float(line)) except (ValueError, IndexError) as err: - print("Could not read distances from '%s'. Ignoring file." %filename) + print("Could not read distances from '%s'. Ignoring file." % filename) if len(distances) == 0: return None @@ -503,31 +587,43 @@ def readcompare(filename): def main(): # configure options parsing - usage = ("%prog srmise_file [options]\n" - "srmise_file can be an extraction file saved by SrMise, " - "or a data file saved by PeakStability.") - descr = ("A very basic tool for somewhat prettier plotting than provided by " - "the basic SrMise classes. Can be used to compare peak positions " - "with those from a list.\n" - "NOTE: At this time the utility only works with peaks extracted using diffpy.srmise.PDFPeakExtraction.") + usage = ( + "%prog srmise_file [options]\n" + "srmise_file can be an extraction file saved by SrMise, " + "or a data file saved by PeakStability." + ) + descr = ( + "A very basic tool for somewhat prettier plotting than provided by " + "the basic SrMise classes. Can be used to compare peak positions " + "with those from a list.\n" + "NOTE: At this time the utility only works with peaks extracted using diffpy.srmise.PDFPeakExtraction." + ) parser = optparse.OptionParser(usage=usage, description=descr) - parser.add_option("--compare", type="string", - help="Compare extracted distances to distances listed (1/line) in this file.") - parser.add_option("--model", type="int", - help="Plot given model from set. Ignored if srmise_file is not a PeakStability file.") - parser.add_option("--show", action="store_true", - help="execute pylab.show() blocking call") - parser.add_option("-o", "--output", type="string", - help="save plot to the specified file") - parser.add_option("--format", type="string", default="eps", - help="output format for plot saving") + parser.add_option( + "--compare", + type="string", + help="Compare extracted distances to distances listed (1/line) in this file.", + ) + parser.add_option( + "--model", + type="int", + help="Plot given model from set. Ignored if srmise_file is not a PeakStability file.", + ) + parser.add_option( + "--show", action="store_true", help="execute pylab.show() blocking call" + ) + parser.add_option( + "-o", "--output", type="string", help="save plot to the specified file" + ) + parser.add_option( + "--format", type="string", default="eps", help="output format for plot saving" + ) parser.allow_interspersed_args = True opts, args = parser.parse_args(sys.argv[1:]) - if len(args) != 1: - parser.error("Exactly one argument required. \n"+usage) + parser.error("Exactly one argument required. \n" + usage) filename = args[0] @@ -535,26 +631,28 @@ def main(): toplot = PDFPeakExtraction() try: toplot.read(filename) - except (Exception): + except Exception: toplot = PeakStability() try: toplot.load(filename) except Exception: - print("File '%s' is not a .srmise or PeakStability data file." %filename) + print( + "File '%s' is not a .srmise or PeakStability data file." % filename + ) return if opts.model is not None: try: toplot.setcurrent(opts.model) - except (Exception): - print("Ignoring model, %s is not a PeakStability file." %filename) + except Exception: + print("Ignoring model, %s is not a PeakStability file." % filename) distances = None if opts.compare is not None: # use baseline from existing file distances = readcompare(opts.compare) - setfigformat(figsize=(6., 4.0)) + setfigformat(figsize=(6.0, 4.0)) figdict = makeplot(toplot, distances) if opts.output: plt.savefig(opts.output, format=opts.format, dpi=600) @@ -565,5 +663,5 @@ def main(): return -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/diffpy/srmise/basefunction.py b/diffpy/srmise/basefunction.py index 45d8d6f..3d23670 100644 --- a/diffpy/srmise/basefunction.py +++ b/diffpy/srmise/basefunction.py @@ -15,14 +15,16 @@ import logging import re import sys -from fontTools.unicode import Unicode + import numpy as np +from fontTools.unicode import Unicode from diffpy.srmise.modelparts import ModelPart, ModelParts from diffpy.srmise.srmiseerrors import * logger = logging.getLogger("diffpy.srmise") + class BaseFunction(object): """Base class for mathematical functions which model numeric sequences. @@ -61,7 +63,15 @@ class BaseFunction(object): transform_parameters() """ - def __init__(self, parameterdict, parformats, default_formats, metadict, base=None, Cache=None): + def __init__( + self, + parameterdict, + parformats, + default_formats, + metadict, + base=None, + Cache=None, + ): """Set parameterdict defined by subclass Parameters @@ -96,23 +106,31 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No vals = self.parameterdict.values() vals.sort() if vals != range(self.npars): - emsg = "Argument parameterdict's values must uniquely specify "+\ - "the index of each parameter defined by its keys." + emsg = ( + "Argument parameterdict's values must uniquely specify " + + "the index of each parameter defined by its keys." + ) raise ValueError(emsg) self.parformats = parformats # Check validity of default_formats self.default_formats = default_formats - if not ("default_input" in self.default_formats and - "default_output" in self.default_formats): - emsg = "Argument default_formats must specify 'default_input' "+\ - "and 'default_output' as keys." + if not ( + "default_input" in self.default_formats + and "default_output" in self.default_formats + ): + emsg = ( + "Argument default_formats must specify 'default_input' " + + "and 'default_output' as keys." + ) raise ValueError(emsg) for f in self.default_formats.values(): if not f in self.parformats: - emsg = "Keys of argument default_formats must map to a "+\ - "value within argument parformats." + emsg = ( + "Keys of argument default_formats must map to a " + + "value within argument parformats." + ) raise ValueError() # Set metadictionary @@ -126,12 +144,11 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No # of PeakFunction. # Object to cache: (basefunctioninstance, tuple of parameters) if Cache is not None: - #self.value = Cache(self.value, "value") - #self.jacobian = Cache(self.jacobian, "jacobian") + # self.value = Cache(self.value, "value") + # self.jacobian = Cache(self.jacobian, "jacobian") pass return - #### "Virtual" class methods #### def actualize(self, *args, **kwds): @@ -164,7 +181,6 @@ def _valueraw(self, *args, **kwds): emsg = "_valueraw must() be implemented in a BaseFunction subclass." raise NotImplementedError(emsg) - #### Class methods #### def jacobian(self, p, r, rng=None): @@ -179,8 +195,10 @@ def jacobian(self, p, r, rng=None): previously calculated values instead. """ if self is not p._owner: - emsg = "Argument 'p' must be evaluated by the BaseFunction "+\ - "subclass which owns it." + emsg = ( + "Argument 'p' must be evaluated by the BaseFunction " + + "subclass which owns it." + ) raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -192,7 +210,7 @@ def jacobian(self, p, r, rng=None): output = [None for j in jac] for idx in range(len(output)): if jac[idx] is not None: - output[idx] = r * 0. + output[idx] = r * 0.0 output[idx][rng] = jac[idx] return output except TypeError: @@ -201,10 +219,10 @@ def jacobian(self, p, r, rng=None): def transform_derivatives(self, pars, in_format=None, out_format=None): """Return gradient matrix for pars converted from in_format to out_format. - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class + Parameters + pars - Sequence of parameters + in_format - A format defined for this class + out_format - A format defined for this class """ # Map unspecified formats to specific formats defined in default_formats if in_format is None: @@ -223,25 +241,29 @@ def transform_derivatives(self, pars, in_format=None, out_format=None): out_format = self.default_formats["default_input"] if not in_format in self.parformats: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'in_format' must be one of %s." % self.parformats + ) if not out_format in self.parformats: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'out_format' must be one of %s." % self.parformats + ) if in_format == out_format: return np.identity(self.npars) - return self._transform_derivativesraw(pars, in_format=in_format, out_format=out_format) + return self._transform_derivativesraw( + pars, in_format=in_format, out_format=out_format + ) def transform_parameters(self, pars, in_format=None, out_format=None): """Return new sequence with pars converted from in_format to out_format. - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. + Also restores parameters to a preferred range if it permits multiple + values that correspond to the same physical result. - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class + Parameters + pars - Sequence of parameters + in_format - A format defined for this class + out_format - A format defined for this class """ # Map unspecified formats to specific formats defined in default_formats if in_format is None: @@ -260,15 +282,18 @@ def transform_parameters(self, pars, in_format=None, out_format=None): out_format = self.default_formats["default_input"] if not in_format in self.parformats: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError( + "Argument 'in_format' must be one of %s." % self.parformats + ) if not out_format in self.parformats: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - #if in_format == out_format: + raise ValueError( + "Argument 'out_format' must be one of %s." % self.parformats + ) + # if in_format == out_format: # return pars - return self._transform_parametersraw(pars, in_format=in_format, out_format=out_format) - + return self._transform_parametersraw( + pars, in_format=in_format, out_format=out_format + ) def value(self, p, r, rng=None): """Calculate value of ModelPart over r, possibly restricted by range. @@ -282,8 +307,10 @@ def value(self, p, r, rng=None): previously calculated values instead. """ if self is not p._owner: - emsg = "Argument 'p' must be evaluated by the BaseFunction "+\ - "subclass which owns it." + emsg = ( + "Argument 'p' must be evaluated by the BaseFunction " + + "subclass which owns it." + ) raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -291,7 +318,7 @@ def value(self, p, r, rng=None): if rng is None: rng = slice(0, len(r)) rpart = r[rng] - output = r * 0. + output = r * 0.0 output[rng] = self._valueraw(p.pars, rpart) return output except TypeError: @@ -338,17 +365,17 @@ def writestr(self, baselist): raise ValueError("emsg") lines = [] # Write function type - lines.append("function=%s" %repr(self.__class__.__name__)) - lines.append("module=%s" %repr(self.getmodule())) + lines.append("function=%s" % repr(self.__class__.__name__)) + lines.append("module=%s" % repr(self.getmodule())) # Write base if self.base is not None: - lines.append("base=%s" %repr(baselist.index(self.base))) + lines.append("base=%s" % repr(baselist.index(self.base))) else: - lines.append("base=%s" %repr(None)) + lines.append("base=%s" % repr(None)) # Write all other metadata for k, (v, f) in self.metadict.iteritems(): - lines.append("%s=%s" %(k, f(v))) - datastring = "\n".join(lines)+"\n" + lines.append("%s=%s" % (k, f(v))) + datastring = "\n".join(lines) + "\n" return datastring @staticmethod @@ -367,19 +394,19 @@ def factory(functionstr, baselist): # populate dictionary with parameter definition # "key=value"->{"key":"value"} - data = re.split(r'(?:[\r\n]+|\A)(\S+)=', data) + data = re.split(r"(?:[\r\n]+|\A)(\S+)=", data) ddict = {} - for i in range(len(data)/2): - ddict[data[2*i+1]] = data[2*i+2] + for i in range(len(data) / 2): + ddict[data[2 * i + 1]] = data[2 * i + 2] # dictionary of parameters pdict = {} - for (k, v) in ddict.items(): + for k, v in ddict.items(): try: pdict[k] = eval(v) except Exception as e: logger.exception(e) - emsg = ("Invalid parameter: %s=%s" %(k,v)) + emsg = "Invalid parameter: %s=%s" % (k, v) raise SrMiseDataFormatError(emsg) function_name = pdict["function"] @@ -438,9 +465,9 @@ def safefunction(f, fsafe): return -#end of class BaseFunction +# end of class BaseFunction -if __name__ == '__main__': +if __name__ == "__main__": from diffpy.srmise.peaks import GaussianOverR, TerminationRipples diff --git a/diffpy/srmise/dataclusters.py b/diffpy/srmise/dataclusters.py index 7428741..6d4f87e 100644 --- a/diffpy/srmise/dataclusters.py +++ b/diffpy/srmise/dataclusters.py @@ -21,6 +21,7 @@ logger = logging.getLogger("diffpy.srmise") + class DataClusters: """Find clusters corresponding to peaks in numerical x-, y-value arrays. @@ -51,7 +52,7 @@ def __init__(self, x, y, res): y - corresponding sequence of y-values res - clustering 'resolution' """ - #Track internal state of clustering. + # Track internal state of clustering. self.INIT = 0 self.READY = 1 self.CLUSTERING = 2 @@ -62,14 +63,12 @@ def __init__(self, x, y, res): return - # This iterator operates not over found clusters, but over the process of # clustering. This behavior could cause confusion and should perhaps be # altered. def __iter__(self): return self - def clear(self): """Clear all members, including user data.""" self.x = np.array([]) @@ -85,7 +84,7 @@ def clear(self): def reset_clusters(self): """Reset all progress on clustering.""" - self.clusters = np.array([[self.data_order[-1],self.data_order[-1]]]) + self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) self.current_idx = self.data_order.size - 1 self.lastcluster_idx = 0 self.lastpoint_idx = self.data_order[-1] @@ -102,7 +101,7 @@ def setdata(self, x, y, res): y - corresponding sequence of y-values res - clustering 'resolution' """ - #Test for error conditions + # Test for error conditions # 1) Length mismatch # 2) Bound errors for res # 3) r isn't sorted? @@ -116,7 +115,7 @@ def setdata(self, x, y, res): self.y = y self.res = res - self.data_order = self.y.argsort() # Defines order of clustering + self.data_order = self.y.argsort() # Defines order of clustering self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) self.current_idx = len(self.data_order) - 1 self.lastcluster_idx = 0 @@ -169,9 +168,10 @@ def next(self): self.lastcluster_idx = nearest_cluster[0] else: # insert right of nearest cluster - self.lastcluster_idx = nearest_cluster[0]+1 - self.clusters = np.insert(self.clusters, self.lastcluster_idx, - [test_idx, test_idx], 0) + self.lastcluster_idx = nearest_cluster[0] + 1 + self.clusters = np.insert( + self.clusters, self.lastcluster_idx, [test_idx, test_idx], 0 + ) return self def makeclusters(self): @@ -200,10 +200,10 @@ def find_nearest_cluster2(self, x): return self.find_nearest_cluster(idx) else: # Choose adjacent index nearest to x - if (self.x[idx] - x) < (x - self.x[idx-1]): + if (self.x[idx] - x) < (x - self.x[idx - 1]): return self.find_nearest_cluster(idx) else: - return self.find_nearest_cluster(idx-1) + return self.find_nearest_cluster(idx - 1) def find_nearest_cluster(self, idx): """Return [cluster index, distance] for cluster nearest to x[idx]. @@ -225,23 +225,27 @@ def find_nearest_cluster(self, idx): return None flat_idx = clusters_flat.searchsorted(idx) - near_idx = flat_idx/2 + near_idx = flat_idx / 2 if flat_idx == len(clusters_flat): - #test_idx is right of the last cluster - return [near_idx-1, self.x[idx]-self.x[self.clusters[-1, 1]]] - if clusters_flat[flat_idx] == idx or flat_idx%2 == 1: + # test_idx is right of the last cluster + return [near_idx - 1, self.x[idx] - self.x[self.clusters[-1, 1]]] + if clusters_flat[flat_idx] == idx or flat_idx % 2 == 1: # idx is within some cluster return [near_idx, 0.0] if flat_idx == 0: # idx is left of the first cluster - return [near_idx, self.x[idx]-self.x[self.clusters[0,0]]] + return [near_idx, self.x[idx] - self.x[self.clusters[0, 0]]] # Calculate which of the two nearest clusters is closer - distances=np.array([self.x[idx]-self.x[self.clusters[near_idx-1, 1]], - self.x[idx]-self.x[self.clusters[near_idx, 0]]]) + distances = np.array( + [ + self.x[idx] - self.x[self.clusters[near_idx - 1, 1]], + self.x[idx] - self.x[self.clusters[near_idx, 0]], + ] + ) if distances[0] < np.abs(distances[1]): - return [near_idx-1, distances[0]] + return [near_idx - 1, distances[0]] else: return [near_idx, distances[1]] @@ -255,15 +259,17 @@ def cluster_is_full(self, cluster_idx): cluster_idx - The index of the cluster to test """ if cluster_idx > 0: - low = self.clusters[cluster_idx-1, 1] + 1 + low = self.clusters[cluster_idx - 1, 1] + 1 else: low = 0 if cluster_idx < len(self.clusters) - 1: - high = self.clusters[cluster_idx+1, 0] - 1 + high = self.clusters[cluster_idx + 1, 0] - 1 else: high = len(self.data_order) - 1 - return self.clusters[cluster_idx, 0] == low \ - and self.clusters[cluster_idx, 1] == high + return ( + self.clusters[cluster_idx, 0] == low + and self.clusters[cluster_idx, 1] == high + ) def combine_clusters(self, combine): """Combine clusters specified by each subarray of cluster indices. @@ -283,15 +289,35 @@ def combine_clusters(self, combine): # Test that all clusters are contiguous and adjacent first = c[0] for i in range(c[0], c[-1]): - if c[i+1-first]-1 != c[i-first]: - raise ValueError(''.join(["Clusters ", str(c[i]), " and ", str(c[i+1]), " are not contiguous and/or increasing."])) - if self.clusters[i+1, 0]-self.clusters[i, 1] != 1: - raise ValueError(''.join(["Clusters ", str(c[i]), " and ", str(c[i+1]), " have unclustered points between them."])) - - #update cluster endpoints + if c[i + 1 - first] - 1 != c[i - first]: + raise ValueError( + "".join( + [ + "Clusters ", + str(c[i]), + " and ", + str(c[i + 1]), + " are not contiguous and/or increasing.", + ] + ) + ) + if self.clusters[i + 1, 0] - self.clusters[i, 1] != 1: + raise ValueError( + "".join( + [ + "Clusters ", + str(c[i]), + " and ", + str(c[i + 1]), + " have unclustered points between them.", + ] + ) + ) + + # update cluster endpoints self.clusters[c[0], 1] = self.clusters[c[-1], 1] todelete = np.array([c[1:] for c in combine]).ravel() - self.clusters = np.delete(self.clusters, todelete ,0) + self.clusters = np.delete(self.clusters, todelete, 0) def find_adjacent_clusters(self): """Return all cluster indices with no unclustered points between them. @@ -306,20 +332,26 @@ def find_adjacent_clusters(self): adj = [] left_idx = 0 - while left_idx < len(self.clusters)-1: - while left_idx < len(self.clusters)-1 and self.clusters[left_idx+1, 0] - self.clusters[left_idx, 1] !=1: + while left_idx < len(self.clusters) - 1: + while ( + left_idx < len(self.clusters) - 1 + and self.clusters[left_idx + 1, 0] - self.clusters[left_idx, 1] != 1 + ): left_idx += 1 # Not left_idx+1 since left_idx=len(self.clusters)-2 even if no # clusters are actually adjacent. right_idx = left_idx - while right_idx < len(self.clusters)-1 and self.clusters[right_idx+1, 0] - self.clusters[right_idx, 1] == 1: + while ( + right_idx < len(self.clusters) - 1 + and self.clusters[right_idx + 1, 0] - self.clusters[right_idx, 1] == 1 + ): right_idx += 1 if right_idx > left_idx: - adj.append(range(left_idx, right_idx+1)) - left_idx = right_idx+1 # set for next possible left_idx + adj.append(range(left_idx, right_idx + 1)) + left_idx = right_idx + 1 # set for next possible left_idx return np.array(adj) def cut(self, idx): @@ -331,24 +363,23 @@ def cut(self, idx): data_ids = self.clusters[idx] if len(data_ids) == data_ids.size: # idx is a scalar, so give single slice object - return slice(data_ids[0], data_ids[1]+1) + return slice(data_ids[0], data_ids[1] + 1) else: # idx is a list/slice, so give list of slice objects - return [slice(c[0], c[1]+1) for c in data_ids] + return [slice(c[0], c[1] + 1) for c in data_ids] def cluster_boundaries(self): """Return sequence with (x,y) of all cluster boundaries.""" boundaries = [] for l in self.clusters: - xlo = np.mean(self.x[l[0]-1:l[0]+1]) - ylo = np.mean(self.y[l[0]-1:l[0]+1]) - xhi = np.mean(self.x[l[1]:l[1]+2]) - yhi = np.mean(self.y[l[1]:l[1]+2]) + xlo = np.mean(self.x[l[0] - 1 : l[0] + 1]) + ylo = np.mean(self.y[l[0] - 1 : l[0] + 1]) + xhi = np.mean(self.x[l[1] : l[1] + 2]) + yhi = np.mean(self.y[l[1] : l[1] + 2]) boundaries.append((xlo, ylo)) boundaries.append((xhi, yhi)) return np.unique(boundaries) - def plot(self, *args, **kwds): """Plot the data with vertical lines at the cluster divisions. @@ -362,7 +393,7 @@ def plot(self, *args, **kwds): boundaries = self.cluster_boundaries() (ymin, ymax) = ax.get_ylim() for b in boundaries: - plt.axvline(b[0], 0, (b[1]-ymin)/(ymax-ymin), color='k') + plt.axvline(b[0], 0, (b[1] - ymin) / (ymax - ymin), color="k") plt.ion() ax.figure.canvas.draw() return @@ -386,13 +417,13 @@ def animate(self): canvas.restore_region(background) boundaries = self.cluster_boundaries() for i, b in enumerate(boundaries): - height = (b[1]-ymin)/(ymax-ymin) + height = (b[1] - ymin) / (ymax - ymin) if i < len(all_lines): all_lines[i].set_xdata([b[0], b[0]]) all_lines[i].set_ydata([0, height]) ax.draw_artist(all_lines[i]) else: - l = plt.axvline(b[0], 0, height, color='k', animated=True) + l = plt.axvline(b[0], 0, height, color="k", animated=True) ax.draw_artist(l) all_lines.append(l) canvas.blit(ax.bbox) @@ -404,21 +435,42 @@ def animate(self): self.status = status return -#End of class DataClusters - -# simple test code -if __name__ == '__main__': +# End of class DataClusters - x = np.array([-2., -1.5, -1., -0.5, 0., 0.5, 1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5.]) - y = np.array([0.0183156, 0.105399, 0.36788, 0.778806, 1.00012, 0.780731, 0.386195, 0.210798, 0.386195, 0.780731, 1.00012, 0.778806, 0.36788, 0.105399, 0.0183156]) - testcluster = DataClusters(x, y, .1) +# simple test code +if __name__ == "__main__": + + x = np.array( + [-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0] + ) + y = np.array( + [ + 0.0183156, + 0.105399, + 0.36788, + 0.778806, + 1.00012, + 0.780731, + 0.386195, + 0.210798, + 0.386195, + 0.780731, + 1.00012, + 0.778806, + 0.36788, + 0.105399, + 0.0183156, + ] + ) + + testcluster = DataClusters(x, y, 0.1) testcluster.makeclusters() print(testcluster.clusters) adj = testcluster.find_adjacent_clusters() print(adj) - if len(adj) >0: + if len(adj) > 0: testcluster.combine_clusters(adj) print(testcluster.clusters) diff --git a/diffpy/srmise/modelcluster.py b/diffpy/srmise/modelcluster.py index a579f3e..1b4bfe0 100644 --- a/diffpy/srmise/modelcluster.py +++ b/diffpy/srmise/modelcluster.py @@ -57,8 +57,10 @@ class ModelCovariance(object): def __init__(self, *args, **kwds): """Intialize object.""" - self.cov = None # The raw covariance matrix - self.model = None # ModelParts instance, so both peaks and baseline (if present) + self.cov = None # The raw covariance matrix + self.model = ( + None # ModelParts instance, so both peaks and baseline (if present) + ) # Map i->[n1,n2,...] of the jth ModelPart to the n_i parameters in cov. self.mmap = {} @@ -96,11 +98,14 @@ def setcovariance(self, model, cov): emsg = "Parameter 'cov' must be a square matrix." raise ValueError(emsg) - - if tempcov.shape[0] != model.npars(True) and tempcov.shape[0] != model.npars(False): - emsg = ["Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", - "parameters in the model, or the total number of parameters (fixed and free) of ", - "the model."] + if tempcov.shape[0] != model.npars(True) and tempcov.shape[0] != model.npars( + False + ): + emsg = [ + "Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", + "parameters in the model, or the total number of parameters (fixed and free) of ", + "the model.", + ] raise ValueError("".join(emsg)) self.model = model.copy() @@ -113,8 +118,8 @@ def setcovariance(self, model, cov): for i, m in enumerate(model): self.mmap[i] = n + np.arange(m.npars(True)) for j, p in enumerate(m): - self.pmap[(i,j)] = n - self.ipmap[n] = (i,j) + self.pmap[(i, j)] = n + self.ipmap[n] = (i, j) n += 1 if n == tempcov.shape[0]: @@ -122,21 +127,20 @@ def setcovariance(self, model, cov): self.cov = tempcov else: # Create new covariance matrix, making sure to account for fixed pars - self.cov = np.matrix(np.zeros((n,n))) + self.cov = np.matrix(np.zeros((n, n))) - i=0 - rawi=0 + i = 0 + rawi = 0 for i in range(n): j = 0 rawj = 0 if free[i]: for j in range(n): if free[j]: - self.cov[i,j] = cov[rawi,rawj] + self.cov[i, j] = cov[rawi, rawj] rawj += 1 rawi += 1 - def transform(self, in_format, out_format, **kwds): """Transform parameters and covariance matrix under specified change of variables. @@ -168,7 +172,7 @@ def transform(self, in_format, out_format, **kwds): if "parts" in kwds: if kwds["parts"] == "peaks": - parts = range(len(self.model)-1) + parts = range(len(self.model) - 1) elif kwds["parts"] == "baseline": parts = [-1] else: @@ -189,33 +193,41 @@ def transform(self, in_format, out_format, **kwds): for i in parts: start = self.mmap[i][0] - stop = self.mmap[i][-1]+1 + stop = self.mmap[i][-1] + 1 p = self.model[i] try: subg = p.owner().transform_derivatives(p.pars, in_format, out_format) except NotImplementedError: - logger.warning("Transformation gradient not implemented for part %i: %s. Ignoring transformation." %(i, str(p))) + logger.warning( + "Transformation gradient not implemented for part %i: %s. Ignoring transformation." + % (i, str(p)) + ) subg = np.identity(p.npars(True)) except Exception as e: - logger.warning("Transformation gradient failed for part %i: %s. Failed with message %s. Ignoring transformation." %(i, str(p), str(e))) + logger.warning( + "Transformation gradient failed for part %i: %s. Failed with message %s. Ignoring transformation." + % (i, str(p), str(e)) + ) subg = np.identity(p.npars(True)) # Now transform the parameters to match try: p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) except Exception as e: - logger.warning("Parameter transformation failed for part %i: %s. Failed with message %s. Ignoring transformation." %(i, str(p), str(e))) + logger.warning( + "Parameter transformation failed for part %i: %s. Failed with message %s. Ignoring transformation." + % (i, str(p), str(e)) + ) subg = np.identity(p.npars(True)) # Update the global gradient matrix g[start:stop, start:stop] = subg g = np.matrix(g) - self.cov = np.array(g*np.matrix(self.cov).transpose()*g) + self.cov = np.array(g * np.matrix(self.cov).transpose() * g) return - def getcorrelation(self, i, j): """Return the correlation between variables i and j, Corr_ij=Cov_ij/(sigma_i sigma_j) @@ -233,10 +245,12 @@ def getcorrelation(self, i, j): i1 = self.pmap[i] if i in self.pmap else i j1 = self.pmap[j] if j in self.pmap else j - if self.cov[i1,i1] == 0. or self.cov[j1,j1] == 0.: - return 0. # Avoiding undefined quantities is sensible in this context. + if self.cov[i1, i1] == 0.0 or self.cov[j1, j1] == 0.0: + return 0.0 # Avoiding undefined quantities is sensible in this context. else: - return self.cov[i1,j1]/(np.sqrt(self.cov[i1,i1])*np.sqrt(self.cov[j1,j1])) + return self.cov[i1, j1] / ( + np.sqrt(self.cov[i1, i1]) * np.sqrt(self.cov[j1, j1]) + ) def getvalue(self, i): """Return value of parameter i. @@ -254,7 +268,7 @@ def getuncertainty(self, i): which indicate the mth parameter of modelpart l. """ (l, m) = i if i in self.pmap else self.ipmap[i] - return np.sqrt(self.getcovariance(i,i)) + return np.sqrt(self.getcovariance(i, i)) def getcovariance(self, i, j): """Return the covariance between variables i and j. @@ -270,7 +284,7 @@ def getcovariance(self, i, j): i1 = self.pmap[i] if i in self.pmap else i j1 = self.pmap[j] if j in self.pmap else j - return self.cov[i1,j1] + return self.cov[i1, j1] def get(self, i): """Return (value, uncertainty) tuple for parameter i. @@ -298,9 +312,9 @@ def correlationwarning(self, threshold=0.8): correlated = [] for i in range(self.cov.shape[0]): - for j in range(i+1, self.cov.shape[0]): - c = self.getcorrelation(i,j) - if c and np.abs(c) > threshold: # filter out None values + for j in range(i + 1, self.cov.shape[0]): + c = self.getcorrelation(i, j) + if c and np.abs(c) > threshold: # filter out None values correlated.append((self.ipmap[i], self.ipmap[j], c)) return correlated @@ -310,7 +324,7 @@ def __str__(self): return "Model and/or Covariance matrix undefined." lines = [] for i, m in enumerate(self.model): - lines.append(" ".join([self.prettypar((i,j)) for j in range(len(m))])) + lines.append(" ".join([self.prettypar((i, j)) for j in range(len(m))])) return "\n".join(lines) def prettypar(self, i): @@ -322,11 +336,12 @@ def prettypar(self, i): if self.model is None or self.cov is None: return "Model and/or Covariance matrix undefined." k = i if i in self.ipmap else self.pmap[i] - return "%.5e (%.5e)" %(self.getvalue(k), np.sqrt(self.getcovariance(k,k))) + return "%.5e (%.5e)" % (self.getvalue(k), np.sqrt(self.getcovariance(k, k))) # End of class ModelCovariance + class ModelCluster(object): """Associate a contiguous cluster of data with an appropriate model. @@ -399,7 +414,7 @@ def __init__(self, model, *args, **kwds): self.error_method = orig.error_method self.peak_funcs = list(orig.peak_funcs) return - else: # Explicit creation + else: # Explicit creation if model is None: self.model = Peaks([]) else: @@ -468,26 +483,28 @@ def writestr(self, **kwds): if self.peak_funcs is None: lines.append("peak_funcs=None") else: - lines.append("peak_funcs=%s" %repr([pfbaselist.index(p) for p in self.peak_funcs])) + lines.append( + "peak_funcs=%s" % repr([pfbaselist.index(p) for p in self.peak_funcs]) + ) if self.error_method is None: - lines.append('ModelEvaluator=None') + lines.append("ModelEvaluator=None") else: - lines.append('ModelEvaluator=%s' %self.error_method.__name__) + lines.append("ModelEvaluator=%s" % self.error_method.__name__) - lines.append("slice=%s" %repr(self.slice)) + lines.append("slice=%s" % repr(self.slice)) # Indexed baseline functions (unless externally provided) if writeblf: lines.append("## BaselineFunctions") for i, bf in enumerate(blfbaselist): - lines.append('# BaselineFunction %s' %i) + lines.append("# BaselineFunction %s" % i) lines.append(bf.writestr(blfbaselist)) # Indexed peak functions (unless externally provided) if writepf: lines.append("## PeakFunctions") for i, pf in enumerate(pfbaselist): - lines.append('# PeakFunction %s' %i) + lines.append("# PeakFunction %s" % i) lines.append(pf.writestr(pfbaselist)) lines.append("# BaselineObject") @@ -501,17 +518,16 @@ def writestr(self, **kwds): lines.append("None") else: for m in self.model: - lines.append('# ModelPeak') + lines.append("# ModelPeak") lines.append(m.writestr(pfbaselist)) # Raw data in modelcluster. - lines.append('### start data') - lines.append('#L r y dy') + lines.append("### start data") + lines.append("#L r y dy") for i in range(len(self.r_data)): - lines.append('%g %g %g' % \ - (self.r_data[i], self.y_data[i], self.y_error[i]) ) + lines.append("%g %g %g" % (self.r_data[i], self.y_data[i], self.y_error[i])) - datastring = "\n".join(lines)+"\n" + datastring = "\n".join(lines) + "\n" return datastring @staticmethod @@ -545,101 +561,98 @@ def factory(mcstr, **kwds): # - StartData # find data section, and what information it contains - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', mcstr, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", mcstr, re.M) if res: - start_data = mcstr[res.end():].strip() - start_data_info = mcstr[res.start():res.end()] - header = mcstr[:res.start()] - res = re.search(r'^(#+L.*)$', start_data_info, re.M) + start_data = mcstr[res.end() :].strip() + start_data_info = mcstr[res.start() : res.end()] + header = mcstr[: res.start()] + res = re.search(r"^(#+L.*)$", start_data_info, re.M) if res: - start_data_info = start_data_info[res.start():res.end()].strip() + start_data_info = start_data_info[res.start() : res.end()].strip() hasr = False hasy = False hasdy = False - res = re.search(r'\br\b', start_data_info) + res = re.search(r"\br\b", start_data_info) if res: hasr = True - res = re.search(r'\by\b', start_data_info) + res = re.search(r"\by\b", start_data_info) if res: hasy = True - res = re.search(r'\bdy\b', start_data_info) + res = re.search(r"\bdy\b", start_data_info) if res: hasdy = True # Model - res = re.search(r'^#+ ModelPeaks.*$', header, re.M) + res = re.search(r"^#+ ModelPeaks.*$", header, re.M) if res: - model_peaks = header[res.end():].strip() - header = header[:res.start()] + model_peaks = header[res.end() :].strip() + header = header[: res.start()] # Baseline Object - res = re.search(r'^#+ BaselineObject\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ BaselineObject\s*(?:#.*\s+)*", header, re.M) if res: - baselineobject = header[res.end():].strip() - header = header[:res.start()] + baselineobject = header[res.end() :].strip() + header = header[: res.start()] # Peak functions if readpf: - res = re.search(r'^#+ PeakFunctions.*$', header, re.M) + res = re.search(r"^#+ PeakFunctions.*$", header, re.M) if res: - peakfunctions = header[res.end():].strip() - header = header[:res.start()] + peakfunctions = header[res.end() :].strip() + header = header[: res.start()] # Baseline functions if readblf: - res = re.search(r'^#+ BaselineFunctions.*$', header, re.M) + res = re.search(r"^#+ BaselineFunctions.*$", header, re.M) if res: - baselinefunctions = header[res.end():].strip() - header = header[:res.start()] + baselinefunctions = header[res.end() :].strip() + header = header[: res.start()] ### Instantiating baseline functions if readblf: blfbaselist = [] - res = re.split(r'(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*', baselinefunctions) + res = re.split( + r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions + ) for s in res[1:]: blfbaselist.append(BaseFunction.factory(s, blfbaselist)) ### Instantiating peak functions if readpf: pfbaselist = [] - res = re.split(r'(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*', peakfunctions) + res = re.split(r"(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*", peakfunctions) for s in res[1:]: pfbaselist.append(BaseFunction.factory(s, pfbaselist)) - ### Instantiating header data # peak_funcs - res = re.search(r'^peak_funcs=(.*)$', header, re.M) + res = re.search(r"^peak_funcs=(.*)$", header, re.M) peak_funcs = eval(res.groups()[0].strip()) if peak_funcs is not None: peak_funcs = [pfbaselist[i] for i in peak_funcs] # error_method - res = re.search(r'^ModelEvaluator=(.*)$', header, re.M) + res = re.search(r"^ModelEvaluator=(.*)$", header, re.M) __import__("diffpy.srmise.modelevaluators") module = sys.modules["diffpy.srmise.modelevaluators"] error_method = getattr(module, res.groups()[0].strip()) # slice - res = re.search(r'^slice=(.*)$', header, re.M) + res = re.search(r"^slice=(.*)$", header, re.M) cluster_slice = eval(res.groups()[0].strip()) - ### Instantiating BaselineObject - if re.match(r'^None$', baselineobject): + if re.match(r"^None$", baselineobject): baseline = None else: baseline = Baseline.factory(baselineobject, blfbaselist) - ### Instantiating model model = Peaks() - res = re.split(r'(?m)^#+ ModelPeak\s*(?:#.*\s+)*', model_peaks) + res = re.split(r"(?m)^#+ ModelPeak\s*(?:#.*\s+)*", model_peaks) for s in res[1:]: model.append(Peak.factory(s, pfbaselist)) - - ### Instantiating start data # read actual data - r, y, dy arrays = [] @@ -663,7 +676,10 @@ def factory(mcstr, **kwds): for line in start_data.split("\n"): l = line.split() if len(arrays) != len(l): - emsg = ("Number of value fields does not match that given by '%s'" %start_data_info) + emsg = ( + "Number of value fields does not match that given by '%s'" + % start_data_info + ) for a, v in zip(arrays, line.split()): a.append(float(v)) except (ValueError, IndexError) as err: @@ -675,8 +691,16 @@ def factory(mcstr, **kwds): if hasdy: y_error = np.array(y_error) - return ModelCluster(model, baseline, r_data, y_data, y_error, cluster_slice, error_method, peak_funcs) - + return ModelCluster( + model, + baseline, + r_data, + y_data, + y_error, + cluster_slice, + error_method, + peak_funcs, + ) @staticmethod def join_adjacent(m1, m2): @@ -724,11 +748,11 @@ def join_adjacent(m1, m2): if not right_ids[0] == left_ids[1]: raise ValueError("Given ModelClusters are not adjacent.") - new_slice=slice(left_ids[0], right_ids[1], 1) + new_slice = slice(left_ids[0], right_ids[1], 1) # Approximately where the clusters meet. - border_x = .5*(left.r_data[left_ids[1]-1] + right.r_data[right_ids[0]]) - border_y = .5*(left.y_data[left_ids[1]-1] + right.y_data[right_ids[0]]) + border_x = 0.5 * (left.r_data[left_ids[1] - 1] + right.r_data[right_ids[0]]) + border_y = 0.5 * (left.y_data[left_ids[1] - 1] + right.y_data[right_ids[0]]) if len(m1.model) > 0 and len(m2.model) > 0: new_model = left.model.copy() @@ -741,24 +765,40 @@ def join_adjacent(m1, m2): # border_x are removed. The highly unlikely case of two peaks # exactly at the border is also handled. for i in reversed(range(len(new_model))): - if new_model[i]["position"] == border_x and \ - i > 0 and new_model[i-1]["position"] == border_x: + if ( + new_model[i]["position"] == border_x + and i > 0 + and new_model[i - 1]["position"] == border_x + ): del new_model[i] elif new_ids[i] != i: - if (new_model[i]["position"] > border_x and new_ids[i] < len(left.model)) and \ - (new_model[i]["position"] < border_x and new_ids[i] >= len(left.model)): + if ( + new_model[i]["position"] > border_x + and new_ids[i] < len(left.model) + ) and ( + new_model[i]["position"] < border_x + and new_ids[i] >= len(left.model) + ): del new_model[i] # Likely to improve any future fitting new_model.match_at(border_x, border_y) elif len(m1.model) > 0: new_model = m1.model.copy() - else: # Only m2 has entries, or both are empty + else: # Only m2 has entries, or both are empty new_model = m2.model.copy() - peak_funcs = list(set(m1.peak_funcs) | set(m2.peak_funcs)) # "Union" - return ModelCluster(new_model, m1.baseline, m1.r_data, m1.y_data, - m1.y_error, new_slice, m1.error_method, peak_funcs) + peak_funcs = list(set(m1.peak_funcs) | set(m2.peak_funcs)) # "Union" + return ModelCluster( + new_model, + m1.baseline, + m1.r_data, + m1.y_data, + m1.y_error, + new_slice, + m1.error_method, + peak_funcs, + ) def change_slice(self, new_slice): """Change the slice which represents the extent of a cluster.""" @@ -786,11 +826,15 @@ def change_slice(self, new_slice): # check if slice has expanded on the left if self.never_fit and self.slice.start < old_slice.start: left_slice = slice(self.slice.start, old_slice.start) - self.never_fit = max(y_data_nobl[left_slice] - self.y_error[left_slice]) < 0 + self.never_fit = ( + max(y_data_nobl[left_slice] - self.y_error[left_slice]) < 0 + ) # check if slice has expanded on the right if self.never_fit and self.slice.stop > old_slice.stop: right_slice = slice(old_slice.stop, self.slice.stop) - self.never_fit = max(y_data_nobl[right_slice] - self.y_error[right_slice]) < 0 + self.never_fit = ( + max(y_data_nobl[right_slice] - self.y_error[right_slice]) < 0 + ) return @@ -808,7 +852,7 @@ def npars(self, count_baseline=True, count_fixed=True): n += self.baseline.npars(count_fixed=count_fixed) return n - def replacepeaks(self, newpeaks, delslice=slice(0,0)): + def replacepeaks(self, newpeaks, delslice=slice(0, 0)): """Replace peaks given by delslice by those in newpeaks. Parameters @@ -824,7 +868,7 @@ def replacepeaks(self, newpeaks, delslice=slice(0,0)): def deletepeak(self, idx): """Delete the peak at the given index.""" - self.replacepeaks([], slice(idx,idx+1)) + self.replacepeaks([], slice(idx, idx + 1)) def estimatepeak(self): """Attempt to add single peak to empty cluster. Return True if successful.""" @@ -840,18 +884,27 @@ def estimatepeak(self): # throw some exception pass selected = self.peak_funcs[0] - estimate = selected.estimate_parameters(self.r_cluster, self.y_cluster - self.valuebl()) - + estimate = selected.estimate_parameters( + self.r_cluster, self.y_cluster - self.valuebl() + ) if estimate is not None: newpeak = selected.actualize(estimate, "internal") - logger.info("Estimate: %s" %newpeak) + logger.info("Estimate: %s" % newpeak) self.replacepeaks(Peaks([newpeak])) return True else: return False - def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=None, cov_format="default_output"): + def fit( + self, + justify=False, + ntrials=0, + fitbaseline=False, + estimate=True, + cov=None, + cov_format="default_output", + ): """Perform a chi-square fit of the model to data in cluster. Parameters @@ -871,7 +924,7 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No if self.never_fit: return None if len(self.model) == 0: - #Attempt to add a first peak to the cluster + # Attempt to add a first peak to the cluster if estimate: try: self.estimatepeak() @@ -890,7 +943,11 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No orig_baseline = self.baseline.copy() self.last_fit_size = self.size - if fitbaseline and self.baseline is not None and self.baseline.npars(count_fixed=False) > 0: + if ( + fitbaseline + and self.baseline is not None + and self.baseline.npars(count_fixed=False) > 0 + ): y_datafit = self.y_data fmodel = ModelParts(self.model) fmodel.append(self.baseline) @@ -899,20 +956,28 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No fmodel = self.model try: - fmodel.fit(self.r_data, - y_datafit, - self.y_error, - self.slice, - ntrials, - cov, - cov_format) + fmodel.fit( + self.r_data, + y_datafit, + self.y_error, + self.slice, + ntrials, + cov, + cov_format, + ) except SrMiseFitError as e: - logger.debug("Error while fitting cluster: %s\nReverting to original model.", e) + logger.debug( + "Error while fitting cluster: %s\nReverting to original model.", e + ) self.model = orig_model self.baseline = orig_baseline return None - if fitbaseline and self.baseline is not None and self.baseline.npars(count_fixed=False) > 0: + if ( + fitbaseline + and self.baseline is not None + and self.baseline.npars(count_fixed=False) > 0 + ): self.model = Peaks(fmodel[:-1]) self.baseline = fmodel[-1] else: @@ -925,17 +990,15 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No # Test for fit improvement if new_qual < orig_qual: # either fit blew up (and leastsq didn't notice) or the fit had already converged. - msg = ["ModelCluster.fit() warning: fit seems not to have improved.", - "Reverting to original model.", - "----------", - "New Quality: %s", - "Original Quality: %s" - "%s", - "----------"] - logger.debug("\n".join(msg), - new_qual.stat, - orig_qual.stat, - self.model) + msg = [ + "ModelCluster.fit() warning: fit seems not to have improved.", + "Reverting to original model.", + "----------", + "New Quality: %s", + "Original Quality: %s" "%s", + "----------", + ] + logger.debug("\n".join(msg), new_qual.stat, orig_qual.stat, self.model) self.model = orig_model self.baseline = orig_baseline @@ -953,9 +1016,11 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No # original fit is less likely to obscure any hidden peaks. if justify and len(self.model) == 1 and len(orig_model) > 0: min_npars = min([p.npars for p in self.peak_funcs]) - if new_qual.growth_justified(self, min_npars): - msg = ["ModelCluster.fit(): Fit over current cluster better explained by additional peaks.", - "Reverting to original model."] + if new_qual.growth_justified(self, min_npars): + msg = [ + "ModelCluster.fit(): Fit over current cluster better explained by additional peaks.", + "Reverting to original model.", + ] logger.debug("\n".join(msg)) self.model = orig_model @@ -974,28 +1039,35 @@ def contingent_fit(self, minpoints, growth_threshold): """ if self.never_fit: return None - if (self.last_fit_size > 0 and float(self.size)/self.last_fit_size >= growth_threshold) \ - or (self.last_fit_size == 0 and self.size >= minpoints): + if ( + self.last_fit_size > 0 + and float(self.size) / self.last_fit_size >= growth_threshold + ) or (self.last_fit_size == 0 and self.size >= minpoints): return self.fit(justify=True) return None def cleanfit(self): """Remove poor-quality peaks in the fit. Return number removed.""" - #Find peaks located outside the cluster + # Find peaks located outside the cluster pos = np.array([p["position"] for p in self.model]) left_idx = pos.searchsorted(self.r_cluster[0]) right_idx = pos.searchsorted(self.r_cluster[-1]) outside_idx = range(0, left_idx) outside_idx.extend(range(right_idx, len(self.model))) - #inside_idx = range(left_idx, right_idx) + # inside_idx = range(left_idx, right_idx) # Identify outside peaks that contribute < error everywhere in cluster. # Must check entire cluster and not just nearest endpoint because not # every peak function can be assumed to have its greatest contribution # there, and errors are not necessarily constant. - outside_idx = [i for i in outside_idx \ - if (self.model[i].removable \ - and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0)] + outside_idx = [ + i + for i in outside_idx + if ( + self.model[i].removable + and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0 + ) + ] # TODO: Check for peaks that have blown up. # Remember to check if a peak is removable. @@ -1003,10 +1075,14 @@ def cleanfit(self): # NaN is too serious not to remove, even if removable is False, but I should look # into better handling anyway. - nan_idx = [i for i in range(len(self.model)) if np.isnan(self.model[i].pars).any()] + nan_idx = [ + i for i in range(len(self.model)) if np.isnan(self.model[i].pars).any() + ] if len(outside_idx) > 0: - msg = ["Following peaks outside cluster made no contribution within it and were removed:"] + msg = [ + "Following peaks outside cluster made no contribution within it and were removed:" + ] msg.extend([str(self.model[i]) for i in outside_idx]) logger.debug("\n".join(msg)) @@ -1015,11 +1091,11 @@ def cleanfit(self): msg.extend([str(self.model[i]) for i in nan_idx]) logger.debug("\n".join(msg)) -# # TODO: Uncomment when there's a point! -# if len(blown_idx) > 0: -# msg = ["Following peaks inside cluster were too large and had to be removed:"] -# msg.extend([str(self.model[i]) for i in blown_idx]) -# logger.info("\n".join(msg)) + # # TODO: Uncomment when there's a point! + # if len(blown_idx) > 0: + # msg = ["Following peaks inside cluster were too large and had to be removed:"] + # msg.extend([str(self.model[i]) for i in blown_idx]) + # logger.info("\n".join(msg)) # A peak can only be removed once. to_remove = list(set(outside_idx) | set(blown_idx) | set(nan_idx)) @@ -1048,7 +1124,7 @@ def reduce_to(self, x, y): logger.debug("reduce_to: No reduction necessary.") return None orig_model = self.model.copy() - self.model.match_at(x, y-self.valuebl(x)) + self.model.match_at(x, y - self.valuebl(x)) quality = self.fit() # Did reduction help? @@ -1067,13 +1143,15 @@ def reduce_to(self, x, y): def value(self, r=None): """Return value of baseline+model over cluster.""" - if len(self.model)==0: + if len(self.model) == 0: return self.valuebl(r) else: if r is None: - return self.valuebl(r)+(self.model.value(self.r_data, self.slice)[self.slice]) + return self.valuebl(r) + ( + self.model.value(self.r_data, self.slice)[self.slice] + ) else: - return self.valuebl(r)+(self.model.value(r)) + return self.valuebl(r) + (self.model.value(r)) def valuebl(self, r=None): """Return baseline's value over cluster. @@ -1088,10 +1166,10 @@ def valuebl(self, r=None): if r is None: return np.zeros(self.size) else: - return r*0. + return r * 0.0 else: if r is None: - return (self.baseline.value(self.r_data, self.slice)[self.slice]) + return self.baseline.value(self.r_data, self.slice)[self.slice] else: return self.baseline.value(r) @@ -1130,9 +1208,9 @@ def plottable(self, joined=False): else: toreturn = [self.r_cluster, self.y_cluster] bl = self.valuebl() - toreturn.extend([self.r_cluster for i in range(2*len(self.model))]) + toreturn.extend([self.r_cluster for i in range(2 * len(self.model))]) for i, p in enumerate(self.model): - toreturn[2*i+3] = bl + p.value(self.r_data, self.slice)[self.slice] + toreturn[2 * i + 3] = bl + p.value(self.r_data, self.slice)[self.slice] return toreturn def plottable_residual(self): @@ -1149,16 +1227,15 @@ def augment(self, source): best_qual = self.quality() source_model = source.model.copy() - msg = ["==== Augmenting model ====", - "Original fit:", - "%s", - "w/ quality: %s", - "New model fits:", - "%s"] - logger.debug("\n".join(msg), - best_model, - best_qual.stat, - source_model) + msg = [ + "==== Augmenting model ====", + "Original fit:", + "%s", + "w/ quality: %s", + "New model fits:", + "%s", + ] + logger.debug("\n".join(msg), best_model, best_qual.stat, source_model) # Each iteration improves best_model by adding the peak from # source_model to best_model that most improves its quality, breaking @@ -1181,28 +1258,27 @@ def augment(self, source): best_model = test_models[args[-1]] del source_model[args[-1]] else: - break # Best possible model has been found. + break # Best possible model has been found. self.replacepeaks(best_model, slice(len(self.model))) # TODO: Do I need this? If test_model contains peaks # by reference, the fit peaks will change as well. self.fit() - msg = ["Best model after fit is:", - "%s", - "w/ quality: %s", - "================="] - logger.debug("\n".join(msg), - self.model, - best_qual.stat) + msg = ["Best model after fit is:", "%s", "w/ quality: %s", "================="] + logger.debug("\n".join(msg), self.model, best_qual.stat) return def __str__(self): """Return string representation of the cluster.""" - return '\n'.join(["Slice: %s" %self.slice, - "Quality: %s" %self.quality().stat, - "Baseline: %s" %self.baseline, - "Peaks:\n%s" %self.model]) + return "\n".join( + [ + "Slice: %s" % self.slice, + "Quality: %s" % self.quality().stat, + "Baseline: %s" % self.baseline, + "Peaks:\n%s" % self.model, + ] + ) def prune(self): """Remove peaks until model quality no longer improves. @@ -1223,10 +1299,19 @@ def prune(self): tracer.pushc() y_nobl = self.y_cluster - self.valuebl() - prune_mc = ModelCluster(None, None, self.r_cluster, y_nobl, self.error_cluster, None, self.error_method, self.peak_funcs) + prune_mc = ModelCluster( + None, + None, + self.r_cluster, + y_nobl, + self.error_cluster, + None, + self.error_method, + self.peak_funcs, + ) orig_model = self.model.copy() - peak_range = 3 # number of peaks on either side of deleted peak to fit + peak_range = 3 # number of peaks on either side of deleted peak to fit check_models = [] for m in orig_model: if m.removable: @@ -1237,16 +1322,11 @@ def prune(self): best_model = self.model.copy() best_qual = self.quality() - msg = ["====Pruning fits:====", - "Original model:", - "%s", - "w/ quality: %s"] - logger.info("\n".join(msg), - best_model, - best_qual.stat) + msg = ["====Pruning fits:====", "Original model:", "%s", "w/ quality: %s"] + logger.info("\n".join(msg), best_model, best_qual.stat) #### Main prune loop #### - while(check_models.count(None) < len(check_models)): + while check_models.count(None) < len(check_models): # Cache value of individual peaks for best current model. best_modely = [] @@ -1271,57 +1351,50 @@ def prune(self): for i in range(len(check_models)): if check_models[i] is not None: # Create model with ith peak removed, and distant peaks effectively fixed - lo = max(i-peak_range, 0) - hi = min(i+peak_range+1, len(best_model)) + lo = max(i - peak_range, 0) + hi = min(i + peak_range + 1, len(best_model)) check_models[i] = best_model[lo:i].copy() - check_models[i].extend(best_model[i+1:hi].copy()) + check_models[i].extend(best_model[i + 1 : hi].copy()) prune_mc.model = check_models[i] - msg = ["len(check_models): %s", - "len(best_model): %s", - "i: %s"] - logger.debug("\n".join(msg), - len(check_models), - len(best_model), - i) - - addpars = best_model.npars() - check_models[i].npars() - best_model[i].npars(count_fixed=False) + msg = ["len(check_models): %s", "len(best_model): %s", "i: %s"] + logger.debug("\n".join(msg), len(check_models), len(best_model), i) + addpars = ( + best_model.npars() + - check_models[i].npars() + - best_model[i].npars(count_fixed=False) + ) # Remove contribution of (effectively) fixed peaks y = np.array(y_nobl) if lo > 0: - logger.debug("len(sum): %s", len(np.sum(best_modely[:lo], axis=0))) + logger.debug( + "len(sum): %s", len(np.sum(best_modely[:lo], axis=0)) + ) y -= np.sum(best_modely[:lo], axis=0) if hi < len(best_modely): y -= np.sum(best_modely[hi:], axis=0) prune_mc.y_data = y prune_mc.y_cluster = y - msg = ["", - "--- %s ---", - "Removed peak: %s", - "Starting model:", - "%s"] - logger.debug("\n".join(msg), - i, - best_model[i], - prune_mc.model) + msg = [ + "", + "--- %s ---", + "Removed peak: %s", + "Starting model:", + "%s", + ] + logger.debug("\n".join(msg), i, best_model[i], prune_mc.model) - prune_mc.fit(ntrials=int(np.sqrt(len(y))+50), estimate=False) + prune_mc.fit(ntrials=int(np.sqrt(len(y)) + 50), estimate=False) qual = prune_mc.quality(kshift=addpars) check_qual = np.append(check_qual, qual) check_qualidx = np.append(check_qualidx, i) - msg = ["Found model:", - "%s", - "addpars: %s", - "qual: %s"] - logger.debug("\n".join(msg), - prune_mc.model, - addpars, - qual.stat) + msg = ["Found model:", "%s", "addpars: %s", "qual: %s"] + logger.debug("\n".join(msg), prune_mc.model, addpars, qual.stat) # Do not check this peak in the future if quality decreased. if qual < best_qual: @@ -1329,21 +1402,22 @@ def prune(self): arg = check_qual.argsort() - msg = [" - Finished round of pruning -", - "best_qual: %s", - "check_qual: %s", - "sorted check_qual: %s"] - logger.debug("\n".join(msg), - best_qual.stat, - [c.stat for c in check_qual], - arg) + msg = [ + " - Finished round of pruning -", + "best_qual: %s", + "check_qual: %s", + "sorted check_qual: %s", + ] + logger.debug( + "\n".join(msg), best_qual.stat, [c.stat for c in check_qual], arg + ) arg = arg[-1] newbest_qual = check_qual[arg] newbest_qualidx = check_qualidx[arg] if newbest_qual > best_qual: - lo = max(newbest_qualidx-peak_range, 0) - hi = min(newbest_qualidx+peak_range+1, len(orig_model)) + lo = max(newbest_qualidx - peak_range, 0) + hi = min(newbest_qualidx + peak_range + 1, len(orig_model)) bmtemp = best_model[:lo] bmtemp.extend(check_models[newbest_qualidx]) bmtemp.extend(best_model[hi:]) @@ -1360,12 +1434,8 @@ def prune(self): self.model = best_model tracer.emit(self) - msg = ["New best model:", - "%s", - "best_qual: %s"] - logger.debug("\n".join(msg), - best_model, - best_qual.stat) + msg = ["New best model:", "%s", "best_qual: %s"] + logger.debug("\n".join(msg), best_model, best_qual.stat) if len(best_model) > 0: del check_models[newbest_qualidx] @@ -1378,38 +1448,39 @@ def prune(self): else: break - msg = ["Best model after pruning is:", - "%s", - "w/ quality: %s", - "================="] - logger.info("\n".join(msg), - self.model, - self.quality().stat) + msg = [ + "Best model after pruning is:", + "%s", + "w/ quality: %s", + "=================", + ] + logger.info("\n".join(msg), self.model, self.quality().stat) tracer.popc() return + # simple test code -if __name__ == '__main__': +if __name__ == "__main__": from numpy.random import randn from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import GaussianOverR - pf = GaussianOverR(.7) - res = .01 + pf = GaussianOverR(0.7) + res = 0.01 - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - r = np.arange(2,4,res) + r = np.arange(2, 4, res) y = ideal_peaks.value(r) + randn(len(r)) err = np.ones(len(r)) evaluator = AICc() - guesspars = [[2.9, .15, 5], [3.6, .3, 5]] + guesspars = [[2.9, 0.15, 5], [3.6, 0.3, 5]] guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, None, r, y, err, None, AICc, [pf]) diff --git a/diffpy/srmise/modelparts.py b/diffpy/srmise/modelparts.py index fbf8dea..4943e0a 100644 --- a/diffpy/srmise/modelparts.py +++ b/diffpy/srmise/modelparts.py @@ -34,8 +34,8 @@ # Before it returned a scalar, later it returned an array of length 1. import pkg_resources as pr -__spv__ = pr.get_distribution('scipy').version -__oldleastsqbehavior__ = (pr.parse_version(__spv__) < pr.parse_version('0.8.0')) +__spv__ = pr.get_distribution("scipy").version +__oldleastsqbehavior__ = pr.parse_version(__spv__) < pr.parse_version("0.8.0") class ModelParts(list): @@ -57,7 +57,16 @@ class ModelParts(list): def __init__(self, *args, **kwds): list.__init__(self, *args, **kwds) - def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="default_output"): + def fit( + self, + r, + y, + y_error, + range=None, + ntrials=0, + cov=None, + cov_format="default_output", + ): """Chi-square fit of all free parameters to given data. There must be at least as many free parameters as data points. @@ -75,12 +84,17 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul """ freepars = self.unpack_freepars() if len(freepars) >= len(r): - emsg = "Cannot fit model with " + str(len(freepars)) +\ - " free parametersbut only "+str(len(r)) + " data points." + emsg = ( + "Cannot fit model with " + + str(len(freepars)) + + " free parametersbut only " + + str(len(r)) + + " data points." + ) raise SrMiseFitError(emsg) if len(freepars) == 0: - #emsg = "Cannot fit model with no free parameters." - #raise SrMiseFitError(emsg) + # emsg = "Cannot fit model with no free parameters." + # raise SrMiseFitError(emsg) return if range == None: @@ -95,38 +109,48 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul plt.cla() plt.title("Before") plt.plot(r, y, label="_nolabel_") - plt.plot(r, (y-self.value(r, range=range))-1.1*(max(y) - min(y)), label="_nolabel_") + plt.plot( + r, + (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), + label="_nolabel_", + ) for p in self: plt.plot(r, p.value(r, range=range), label=str(p)) plt.ion() try: f = leastsq( - self.residual, # minimize this function - freepars, # initial parameters - args=args, # arguments to residual, residual_jacobian - Dfun=self.residual_jacobian, # explicit Jacobian - col_deriv=1, # order of derivatives in Jacobian + self.residual, # minimize this function + freepars, # initial parameters + args=args, # arguments to residual, residual_jacobian + Dfun=self.residual_jacobian, # explicit Jacobian + col_deriv=1, # order of derivatives in Jacobian full_output=1, - maxfev=ntrials) + maxfev=ntrials, + ) except NotImplementedError: # TODO: Figure out if is worth checking for residual_jacobian # before leastsq(). This exception will either occur almost never # or extremely frequently, and the extra evaluations will add up. logger.info("One or more functions do not define residual_jacobian().") f = leastsq( - self.residual, # minimize this function - freepars, # initial parameters - args=args, # arguments to residual - col_deriv=1, # order of derivatives in Jacobian + self.residual, # minimize this function + freepars, # initial parameters + args=args, # arguments to residual + col_deriv=1, # order of derivatives in Jacobian full_output=1, - maxfev=ntrials) + maxfev=ntrials, + ) except Exception: # Sadly, KeyboardInterrupt, etc. is reraised as minpack.error # Not much I can do about that, though. import traceback - emsg = "Unexpected error in modelparts.fit(). Original exception:\n" +\ - traceback.format_exc() + "End original exception." + + emsg = ( + "Unexpected error in modelparts.fit(). Original exception:\n" + + traceback.format_exc() + + "End original exception." + ) raise SrMiseFitError(emsg) result = f[0] @@ -144,22 +168,34 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul plt.cla() plt.title("After") plt.ion() - plt.plot(r, y, - r, (y-self.value(r, range=range))-1.1*(max(y) - min(y)), - *[i for sublist in [[r, p.value(r, range=range)] for p in self] for i in sublist]) + plt.plot( + r, + y, + r, + (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), + *[ + i + for sublist in [[r, p.value(r, range=range)] for p in self] + for i in sublist + ] + ) plt.draw() if srmiselog.wait: - print("Press 'Enter' to continue...",) + print( + "Press 'Enter' to continue...", + ) input() - if f[4] not in (1,2,3,4): + if f[4] not in (1, 2, 3, 4): emsg = "Fit did not succeed -- " + str(f[3]) raise SrMiseFitError(emsg) # clean up parameters for p in self: - p.pars = p.owner().transform_parameters(p.pars, in_format="internal", out_format="internal") + p.pars = p.owner().transform_parameters( + p.pars, in_format="internal", out_format="internal" + ) # Supply estimated covariance matrix if requested. # The precise relationship between f[1] and estimated covariance matrix is a little unclear from @@ -169,28 +205,28 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul pcov = f[1] fvec = f[2]["fvec"] dof = len(r) - len(freepars) - cov.setcovariance(self, pcov*np.sum(fvec**2)/dof) + cov.setcovariance(self, pcov * np.sum(fvec**2) / dof) try: cov.transform(in_format="internal", out_format=cov_format) except SrMiseUndefinedCovarianceError as e: logger.warn("Covariance not defined. Fit may not have converged.") - return -#### Notes on the fit f -# f[0] = solution -# f[1] = Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. -# None if a singular matrix encountered (indicates very flat curvature in some direction). -# This matrix must be multiplied by the residual variance to get the covariance of the parameter -# estimates - see curve fit. -# f[2] = dictionary{nfev: int, fvec: array(), fjac: array(), ipvt: array(), qtf: array()} -# nfev - The number of function calls made -# fvec - function (residual) evaluated at solution -# fjac - "a permutation of the R matrix of a QR factorization of the final Jacobian." -# ipvt - integer array defining a permutation matrix P such that fjac*P=QR -# qtf - transpose(q)*fvec -# f[3] = message about results of fit -# f[4] = integer flag. Fit was successful on 1,2,3, or 4. Otherwise unsuccessful. + + #### Notes on the fit f + # f[0] = solution + # f[1] = Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. + # None if a singular matrix encountered (indicates very flat curvature in some direction). + # This matrix must be multiplied by the residual variance to get the covariance of the parameter + # estimates - see curve fit. + # f[2] = dictionary{nfev: int, fvec: array(), fjac: array(), ipvt: array(), qtf: array()} + # nfev - The number of function calls made + # fvec - function (residual) evaluated at solution + # fjac - "a permutation of the R matrix of a QR factorization of the final Jacobian." + # ipvt - integer array defining a permutation matrix P such that fjac*P=QR + # qtf - transpose(q)*fvec + # f[3] = message about results of fit + # f[4] = integer flag. Fit was successful on 1,2,3, or 4. Otherwise unsuccessful. def npars(self, count_fixed=True): """Return total number of parameters in all parts. @@ -201,7 +237,7 @@ def npars(self, count_fixed=True): """ n = 0 for p in self: - n+=p.npars(count_fixed=count_fixed) + n += p.npars(count_fixed=count_fixed) return n def pack_freepars(self, freepars): @@ -229,9 +265,9 @@ def residual(self, freepars, r, y_expected, y_error, range=None): try: if range is None: range = slice(0, len(r)) - return (y_expected[range]-total[range])/y_error[range] + return (y_expected[range] - total[range]) / y_error[range] except TypeError: - return (y_expected-total)/y_error + return (y_expected - total) / y_error def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): """Calculate the Jacobian of freepars. @@ -245,22 +281,24 @@ def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): All the data by default. """ if len(freepars) == 0: - raise ValueError("Argument freepars has length 0. The Jacobian " - "is only defined with >=1 free parameters.") + raise ValueError( + "Argument freepars has length 0. The Jacobian " + "is only defined with >=1 free parameters." + ) self.pack_freepars(freepars) - tempJac=[] + tempJac = [] for p in self: - tempJac[len(tempJac):] = p.jacobian(r, range) + tempJac[len(tempJac) :] = p.jacobian(r, range) # Since the residual is (expected - calculated) the jacobian # of the residual has a minus sign. - jac=-np.array([j for j in tempJac if j is not None]) + jac = -np.array([j for j in tempJac if j is not None]) try: if range is None: range = slice(0, len(r)) - return jac[:,range]/y_error[range] + return jac[:, range] / y_error[range] except TypeError: - return jac/y_error + return jac / y_error def value(self, r, range=None): """Calculate total value of all parts over range. @@ -270,14 +308,14 @@ def value(self, r, range=None): range - Slice object specifying region of r and y over which to fit. All the data by default. """ - total = r * 0. + total = r * 0.0 for p in self: total += p.value(r, range) return total def unpack_freepars(self): """Return array of all free parameters.""" - #To check: ravel() sometimes returns a reference and othertimes a copy. + # To check: ravel() sometimes returns a reference and othertimes a copy. # Do I need to use flatten() instead? return np.concatenate([p.compress() for p in self]).ravel() @@ -302,16 +340,16 @@ def covariance(self, format="internal", **kwds): try: idx = int(k[1:]) except ValueError: - emsg = "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." %k + emsg = ( + "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." + % k + ) raise ValueError(emsg) formats[int(k[1:])] = v - - return - def copy(self): """Return deep copy of this ModelParts. @@ -321,7 +359,7 @@ def copy(self): def __str__(self): """Return string representation of this ModelParts.""" - return ''.join([str(p)+"\n" for p in self]) + return "".join([str(p) + "\n" for p in self]) def __getslice__(self, i, j): """Extends list.__getslice__""" @@ -338,10 +376,15 @@ def transform(self, in_format="internal", out_format="internal"): try: p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) except ValueError: - logger.info("Invalid parameter transformation: Ignoring %s->%s for function of type %s." %(in_format, out_format, p.owner().getmodule())) + logger.info( + "Invalid parameter transformation: Ignoring %s->%s for function of type %s." + % (in_format, out_format, p.owner().getmodule()) + ) + # End of class ModelParts + class ModelPart(object): """Represents a single part (instance of some function) of a model. @@ -388,18 +431,22 @@ def __init__(self, owner, pars, free=None, removable=True, static_owner=False): self._owner = owner if len(pars) != owner.npars: - emsg = "The length of pars must equal the number of parameters "+\ - "specified by the model part owner." + emsg = ( + "The length of pars must equal the number of parameters " + + "specified by the model part owner." + ) raise ValueError(emsg) - self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart + self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart if free is None: self.free = np.array([True for p in pars], dtype=bool) else: self.free = np.array(free, dtype=bool) if len(self.free) != owner.npars: - emsg = "The length of free must be equal to the number of "+\ - "parameters specified by the model part owner." + emsg = ( + "The length of free must be equal to the number of " + + "parameters specified by the model part owner." + ) raise ValueError(emsg) self.removable = removable @@ -419,8 +466,10 @@ def changeowner(self, owner): emsg = "Cannot change owner if static_owner is True." raise SrMiseStaticOwnerError(emsg) if self._owner.npars != owner.npars: - emsg = "New owner specifies different number of parameters than "+\ - "original owner." + emsg = ( + "New owner specifies different number of parameters than " + + "original owner." + ) raise SrMiseStaticOwnerError(emsg) self._owner = owner @@ -453,8 +502,8 @@ def update(self, freepars): """ numfree = self.npars(count_fixed=False) if len(freepars) < numfree: - pass # raise "freepars does not have enough elements to - # update every unheld parameter." + pass # raise "freepars does not have enough elements to + # update every unheld parameter." # TODO: Check if I need to make copies here, or if references # to parameters are safe. self.pars[self.free] = freepars[:numfree] @@ -475,7 +524,9 @@ def copy(self): The original and the copy are completely independent, except they both reference the same owner.""" - return type(self).__call__(self._owner, self.pars, self.free, self.removable, self.static_owner) + return type(self).__call__( + self._owner, self.pars, self.free, self.removable, self.static_owner + ) def __getitem__(self, key_or_idx): """Return parameter of peak corresponding with key_or_idx. @@ -529,20 +580,26 @@ def npars(self, count_fixed=True): def __str__(self): """Return string representation of ModelPart parameters.""" - return str(self._owner.transform_parameters(self.pars, in_format="internal", out_format="default_output")) + return str( + self._owner.transform_parameters( + self.pars, in_format="internal", out_format="default_output" + ) + ) def __eq__(self, other): - """ """ + """ """ if hasattr(other, "_owner"): - return ((self._owner is other._owner) and - np.all(self.pars == other.pars) and - np.all(self.free == other.free) and - self.removable == other.removable) + return ( + (self._owner is other._owner) + and np.all(self.pars == other.pars) + and np.all(self.free == other.free) + and self.removable == other.removable + ) else: return False def __ne__(self, other): - """ """ + """ """ return not self == other def writestr(self, ownerlist): @@ -557,19 +614,20 @@ def writestr(self, ownerlist): emsg = "ownerlist does not contain this ModelPart's owner." raise ValueError(emsg) lines = [] - lines.append("owner=%s" %repr(ownerlist.index(self._owner))) - - #Lists/numpy arrays don't give full representation of long lists - lines.append("pars=[%s]" %", ".join([repr(p) for p in self.pars])) - lines.append("free=[%s]" %", ".join([repr(f) for f in self.free])) - lines.append("removable=%s" %repr(self.removable)) - lines.append("static_owner=%s" %repr(self.static_owner)) - datastring = "\n".join(lines)+"\n" + lines.append("owner=%s" % repr(ownerlist.index(self._owner))) + + # Lists/numpy arrays don't give full representation of long lists + lines.append("pars=[%s]" % ", ".join([repr(p) for p in self.pars])) + lines.append("free=[%s]" % ", ".join([repr(f) for f in self.free])) + lines.append("removable=%s" % repr(self.removable)) + lines.append("static_owner=%s" % repr(self.static_owner)) + datastring = "\n".join(lines) + "\n" return datastring + # End of class ModelPart # simple test code -if __name__ == '__main__': +if __name__ == "__main__": pass diff --git a/diffpy/srmise/multimodelselection.py b/diffpy/srmise/multimodelselection.py index 14b52be..01995e2 100644 --- a/diffpy/srmise/multimodelselection.py +++ b/diffpy/srmise/multimodelselection.py @@ -23,13 +23,15 @@ logger = logging.getLogger("diffpy.srmise") + def eatkwds(*args, **kwds): """Convenience function to remove all keywords in args from kwds.""" for k in args: if k in kwds: - print("Keyword %s=%s ignored." %(k, kwds.pop(k))) + print("Keyword %s=%s ignored." % (k, kwds.pop(k))) return kwds + class MultimodelSelection(PeakStability): """Quick and dirty multimodel selection using AIC and its offspring.""" @@ -52,7 +54,9 @@ def __init__(self): self.classweights = {} self.classprobs = {} self.sortedclassprobs = {} - self.sortedclasses = {} # dg->as self.classes, but with model indices sorted by best AIC + self.sortedclasses = ( + {} + ) # dg->as self.classes, but with model indices sorted by best AIC PeakStability.__init__(self) return @@ -67,7 +71,9 @@ def makeaics(self, dgs, dr, filename=None): nominal value. filename - Optional file to save pickled results """ - aics_out = {} # Version of self.aics that holds only the statistic, not the AIC object. + aics_out = ( + {} + ) # Version of self.aics that holds only the statistic, not the AIC object. self.dgs = np.array(dgs) for i, dg in enumerate(self.dgs): self.dgs_idx[dg] = i @@ -77,12 +83,11 @@ def makeaics(self, dgs, dr, filename=None): (r, y, dr, dy) = self.ppe.resampledata(dr) for model_idx in range(len(self.results)): - print("Testing model %s of %s." %(model_idx, len(self.results))) + print("Testing model %s of %s." % (model_idx, len(self.results))) result = self.results[model_idx] em = self.ppe.error_method - # This section dependent on spaghetti code elsewhere in srmise! # Short cut evaluation of AICs which doesn't require calculating chi-square # over and over again. This method assumes that the various uncertainties @@ -95,13 +100,17 @@ def makeaics(self, dgs, dr, filename=None): # modelevaluators subpackage are in need of a rewrite, and so it would be # best to do them all at once. dg0 = self.dgs[0] - mc = ModelCluster(result[1], result[2], r, y, dg0*np.ones(len(r)), None, em, self.ppe.pf) + mc = ModelCluster( + result[1], result[2], r, y, dg0 * np.ones(len(r)), None, em, self.ppe.pf + ) em0 = mc.quality() for dg in self.dgs: em_instance = em() - em_instance.chisq = em0.chisq*(dg0/dg)**2 # rescale chi-square - em_instance.evaluate(mc) # evaluate AIC without recalculating chi-square + em_instance.chisq = em0.chisq * (dg0 / dg) ** 2 # rescale chi-square + em_instance.evaluate( + mc + ) # evaluate AIC without recalculating chi-square self.aics[dg].append(em_instance) aics_out[dg].append(em_instance.stat) @@ -111,10 +120,10 @@ def makeaics(self, dgs, dr, filename=None): if filename is not None: try: - import cPickle as pickle + import cPickle as pickle except: - import pickle - out_s = open(filename, 'wb') + import pickle + out_s = open(filename, "wb") pickle.dump(aics_out, out_s) out_s.close() @@ -123,10 +132,10 @@ def makeaics(self, dgs, dr, filename=None): def loadaics(self, filename): """Load file containing results of the testall method.""" try: - import cPickle as pickle + import cPickle as pickle except: - import pickle - in_s = open(filename, 'rb') + import pickle + in_s = open(filename, "rb") aics_in = pickle.load(in_s) in_s.close() @@ -171,7 +180,7 @@ def makesortedprobs(self): for dg in self.dgs: self.sortedprobs[dg] = np.argsort(self.aicprobs[dg]).tolist() - def animate_probs(self, step=False, duration=0., **kwds): + def animate_probs(self, step=False, duration=0.0, **kwds): """Show animation of extracted peaks from first to last. Parameters: @@ -181,14 +190,17 @@ def animate_probs(self, step=False, duration=0., **kwds): Keywords passed to pyplot.plot()""" if duration > 0: import time - sleeptime = duration/len(self.dgs) + + sleeptime = duration / len(self.dgs) plt.ion() plt.subplot(211) best_idx = self.sortedprobs[self.dgs[0]][-1] - line, = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) + (line,) = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) vline = plt.axvline(self.dgs[0]) - dot, = plt.plot(self.dgs[best_idx],self.aicprobs[self.dgs[0]][best_idx],'ro') + (dot,) = plt.plot( + self.dgs[best_idx], self.aicprobs[self.dgs[0]][best_idx], "ro" + ) plt.subplot(212) self.setcurrent(best_idx) @@ -212,7 +224,7 @@ def animate_probs(self, step=False, duration=0., **kwds): if duration > 0: time.sleep(sleeptime) - def animate_classprobs(self, step=False, duration=0., **kwds): + def animate_classprobs(self, step=False, duration=0.0, **kwds): """Show animation of extracted peaks from first to last. Parameters: @@ -222,23 +234,31 @@ def animate_classprobs(self, step=False, duration=0., **kwds): Keywords passed to pyplot.plot()""" if duration > 0: import time - sleeptime = duration/len(self.dgs) + + sleeptime = duration / len(self.dgs) plt.ion() ax1 = plt.subplot(211) bestclass_idx = self.sortedclassprobs[self.dgs[0]][-1] best_idx = self.sortedclasses[self.dgs[0]][bestclass_idx][-1] - arrow_left = len(self.classes)-1 - arrow_right = arrow_left + .05*arrow_left - line, = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) - dot, = plt.plot(self.dgs[best_idx],self.classprobs[self.dgs[0]][bestclass_idx],'ro') - plt.axvline(arrow_left, color='k') + arrow_left = len(self.classes) - 1 + arrow_right = arrow_left + 0.05 * arrow_left + (line,) = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) + (dot,) = plt.plot( + self.dgs[best_idx], self.classprobs[self.dgs[0]][bestclass_idx], "ro" + ) + plt.axvline(arrow_left, color="k") ax2 = ax1.twinx() - ax2.set_ylim(self.dgs[0],self.dgs[-1]) + ax2.set_ylim(self.dgs[0], self.dgs[-1]) ax2.set_ylabel("dg") ax1.set_xlim(right=arrow_right) ax2.set_xlim(right=arrow_right) - dgarrow = ax2.annotate("",(arrow_right, self.dgs[0]), (arrow_left, self.dgs[0]), arrowprops=dict(arrowstyle="-|>")) + dgarrow = ax2.annotate( + "", + (arrow_right, self.dgs[0]), + (arrow_left, self.dgs[0]), + arrowprops=dict(arrowstyle="-|>"), + ) plt.subplot(212) self.setcurrent(best_idx) @@ -247,7 +267,7 @@ def animate_classprobs(self, step=False, duration=0., **kwds): minval = np.min(val[1::2]) [r, res] = tempcluster.plottable_residual() plt.plot(*val) - plt.plot(r, minval-np.max(res)+res) + plt.plot(r, minval - np.max(res) + res) for dg in self.dgs[1:]: plt.ioff() line.set_ydata(self.classprobs[dg]) @@ -263,7 +283,7 @@ def animate_classprobs(self, step=False, duration=0., **kwds): minval = np.min(val[1::2]) [r, res] = tempcluster.plottable_residual() plt.plot(*val) - plt.plot(r, minval-np.max(res)+res) + plt.plot(r, minval - np.max(res) + res) dot.set_xdata(bestclass_idx) dot.set_ydata(self.classprobs[dg][bestclass_idx]) plt.ion() @@ -295,16 +315,18 @@ def classify(self, r, tolerance=0.05): self.classes_idx = {} self.class_tolerance = None - classes = [] # each element is a list of the models (result indices) in the class - classes_idx = {} # given an integer corresponding to a model, return its class - epsqval = {} # holds the squared value of each class' exemplar peaks - ebsqval = {} # holds the squared value of each class exemplar baselines + classes = ( + [] + ) # each element is a list of the models (result indices) in the class + classes_idx = {} # given an integer corresponding to a model, return its class + epsqval = {} # holds the squared value of each class' exemplar peaks + ebsqval = {} # holds the squared value of each class exemplar baselines for i in range(len(self.results)): peaks = self.results[i][1] baseline = self.results[i][2] - bsqval = baseline.value(r)**2 - psqval = [p.value(r)**2 for p in peaks] + bsqval = baseline.value(r) ** 2 + psqval = [p.value(r) ** 2 for p in peaks] added_to_class = False for c in range(len(classes)): @@ -318,10 +340,10 @@ def classify(self, r, tolerance=0.05): continue # check peak types and number of parameters - badpeak=False + badpeak = False if len(peaks) != len(exemplar_peaks): continue - for p, ep in zip(peaks,exemplar_peaks): + for p, ep in zip(peaks, exemplar_peaks): if type(p) != type(ep): badpeak = True break @@ -333,19 +355,23 @@ def classify(self, r, tolerance=0.05): # check peak values current_psqval = [] - for p, ep in zip(psqval,epsqval[c]): - basediff = np.abs(np.sum(p-ep)) - #if basediff > tolerance*np.sum(ep): - if basediff > tolerance*np.sum(ep) or basediff > tolerance*np.sum(p): + for p, ep in zip(psqval, epsqval[c]): + basediff = np.abs(np.sum(p - ep)) + # if basediff > tolerance*np.sum(ep): + if basediff > tolerance * np.sum( + ep + ) or basediff > tolerance * np.sum(p): badpeak = True break if badpeak: continue # check baseline values - basediff = np.abs(np.sum(bsqval-ebsqval[c])) - #if basediff > tolerance*np.sum(ebsqval[c]): - if basediff > tolerance*np.sum(ebsqval[c]) or basediff > tolerance*np.sum(bsqval): + basediff = np.abs(np.sum(bsqval - ebsqval[c])) + # if basediff > tolerance*np.sum(ebsqval[c]): + if basediff > tolerance * np.sum( + ebsqval[c] + ) or basediff > tolerance * np.sum(bsqval): continue # that's all the checks, add to current class @@ -357,7 +383,7 @@ def classify(self, r, tolerance=0.05): if added_to_class is False: # make a new class with the current model as exemplar classes.append([i]) - classnum = len(classes)-1 + classnum = len(classes) - 1 classes_idx[i] = classnum epsqval[classnum] = psqval ebsqval[classnum] = bsqval @@ -393,7 +419,9 @@ def makeclassweights(self): for dg in self.dgs: bestinclass = [cls[-1] for cls in self.sortedclasses[dg]] - self.classweights[dg] = em.akaikeweights([self.aics[dg][b] for b in bestinclass]) + self.classweights[dg] = em.akaikeweights( + [self.aics[dg][b] for b in bestinclass] + ) def makeclassprobs(self): self.classprobs = {} @@ -401,7 +429,9 @@ def makeclassprobs(self): for dg in self.dgs: bestinclass = [cls[-1] for cls in self.sortedclasses[dg]] - self.classprobs[dg] = em.akaikeprobs([self.aics[dg][b] for b in bestinclass]) + self.classprobs[dg] = em.akaikeprobs( + [self.aics[dg][b] for b in bestinclass] + ) def makesortedclassprobs(self): self.sortedclassprobs = {} @@ -411,7 +441,7 @@ def makesortedclassprobs(self): def dg_key(self, dg_in): """Return the dg value usable as a key nearest to dg_in.""" - idx = (np.abs(self.dgs-dg_in)).argmin() + idx = (np.abs(self.dgs - dg_in)).argmin() return self.dgs[idx] def bestclasses(self, dgs=None): @@ -489,7 +519,7 @@ def plot3dclassprobs(self, **kwds): from mpl_toolkits.mplot3d import Axes3D fig = kwds.pop("figure", plt.gcf()) - ax = fig.add_subplot(kwds.pop("subplot",111), projection='3d') + ax = fig.add_subplot(kwds.pop("subplot", 111), projection="3d") cbkwds = kwds.copy() @@ -497,16 +527,16 @@ def plot3dclassprobs(self, **kwds): dGs = kwds.pop("dGs", self.dgs) highlight = kwds.pop("highlight", []) classes = kwds.pop("classes", range(len(self.classes))) - probfilter = kwds.pop("probfilter", [0.,1.]) + probfilter = kwds.pop("probfilter", [0.0, 1.0]) class_size = kwds.pop("class_size", "number") norm = kwds.pop("norm", "auto") cmap = kwds.pop("cmap", cm.jet) highlight_cmap = kwds.pop("highlight_cmap", cm.gray) title = kwds.pop("title", True) p_alpha = kwds.pop("p_alpha", 0.7) - scale = kwds.pop("scale", 1.) + scale = kwds.pop("scale", 1.0) - xs = dGs*scale + xs = dGs * scale verts = [] zs = [] zlabels = [] @@ -515,22 +545,22 @@ def plot3dclassprobs(self, **kwds): maxys = np.max(ys) if maxys >= probfilter[0] and maxys <= probfilter[1]: - p0, p1 = ((xs[0], 0), (xs[-1],0)) # points to close the vertices - verts.append(np.concatenate([[p0], zip(xs,ys), [p1], [p0]])) + p0, p1 = ((xs[0], 0), (xs[-1], 0)) # points to close the vertices + verts.append(np.concatenate([[p0], zip(xs, ys), [p1], [p0]])) zlabels.append(i) ### Define face colors fc = np.array([len(self.classes[z]) for z in zlabels]) if class_size is "fraction": - fc = fc/float(len(self.results)) + fc = fc / float(len(self.results)) # Index the colormap if necessary if class_size is "number": if norm is "auto": - indexedcolors = cmap(np.linspace(0., 1., np.max(fc))) + indexedcolors = cmap(np.linspace(0.0, 1.0, np.max(fc))) cmap = colors.ListedColormap(indexedcolors) elif norm is "full": - indexedcolors = cmap(np.linspace(0., 1., len(self.results))) + indexedcolors = cmap(np.linspace(0.0, 1.0, len(self.results))) cmap = colors.ListedColormap(indexedcolors) # A user-specified norm cannot be used to index a colormap. @@ -540,45 +570,59 @@ def plot3dclassprobs(self, **kwds): mic = np.min(fc) mac = np.max(fc) nc = mac - mic + 1 - norm = colors.BoundaryNorm(np.linspace(mic, mac+1, nc+1), nc) + norm = colors.BoundaryNorm(np.linspace(mic, mac + 1, nc + 1), nc) if class_size is "fraction": norm = colors.Normalize() norm.autoscale(fc) elif norm is "full": mcolor = len(self.results) if class_size is "number": - norm = colors.BoundaryNorm(np.linspace(0, mcolor+1, mcolor+2), mcolor+1) + norm = colors.BoundaryNorm( + np.linspace(0, mcolor + 1, mcolor + 2), mcolor + 1 + ) if class_size is "fraction": - norm = colors.Normalize(0., 1.) + norm = colors.Normalize(0.0, 1.0) zs = np.arange(len(zlabels)) poly = PolyCollection(verts, facecolors=cmap(norm(fc)), closed=False) poly.set_alpha(p_alpha) - cax = ax.add_collection3d(poly, zs=zs, zdir='y') + cax = ax.add_collection3d(poly, zs=zs, zdir="y") # Highlight values of interest color_idx = np.linspace(0, 1, len(highlight)) for dG, ci in zip(highlight, color_idx): for z_logical, z_plot in zip(zlabels, zs): - ax.plot([dG, dG], [z_plot, z_plot], [0, self.classprobs[dG][z_logical]], color=highlight_cmap(ci), alpha=p_alpha) - - ax.set_xlabel('dG') - ax.set_xlim3d(dGs[0]*scale, dGs[-1]*scale) - ax.set_ylabel('Class') + ax.plot( + [dG, dG], + [z_plot, z_plot], + [0, self.classprobs[dG][z_logical]], + color=highlight_cmap(ci), + alpha=p_alpha, + ) + + ax.set_xlabel("dG") + ax.set_xlim3d(dGs[0] * scale, dGs[-1] * scale) + ax.set_ylabel("Class") ax.set_ylim3d(zs[0], zs[-1]) ax.set_yticks(zs) ax.set_yticklabels([str(z) for z in zlabels]) - ax.set_zlabel('Akaike probability') + ax.set_zlabel("Akaike probability") ax.set_zlim3d(0, 1) if title is True: - title = "Class probabilities\n\ + title = ( + "Class probabilities\n\ Max probabilities in %s\n\ - %i/%i classes with %i/%i models displayed"\ - %(probfilter, - len(zs), len(self.classes), - np.sum([len(self.classes[z]) for z in zlabels]), len(self.results) ) + %i/%i classes with %i/%i models displayed" + % ( + probfilter, + len(zs), + len(self.classes), + np.sum([len(self.classes[z]) for z in zlabels]), + len(self.results), + ) + ) if title is not False: figtitle = fig.suptitle(title) @@ -586,23 +630,26 @@ def plot3dclassprobs(self, **kwds): # Add colorbar if "cbpos" in kwds: cbpos = kwds.pop("cbpos") - aspect = cbpos[3]/cbpos[2] - plt.tight_layout() # do it before cbaxis, so colorbar is ignored. + aspect = cbpos[3] / cbpos[2] + plt.tight_layout() # do it before cbaxis, so colorbar is ignored. transAtoF = ax.transAxes + fig.transFigure.inverted() rect = transforms.Bbox.from_bounds(*cbpos).transformed(transAtoF).bounds cbaxis = fig.add_axes(rect) # Remove all colorbar.make_axes keywords except orientation - kwds = eatkwds("fraction", "pad", "shrink", "aspect", - "anchor", "panchor", **kwds) + kwds = eatkwds( + "fraction", "pad", "shrink", "aspect", "anchor", "panchor", **kwds + ) else: kwds.setdefault("shrink", 0.75) # In matplotlib 1.1.0 make_axes_gridspec ignores anchor and panchor keywords. # Eat these keywords for now. kwds = eatkwds("anchor", "panchor", **kwds) - cbaxis, kwds = colorbar.make_axes_gridspec(ax, **kwds) # gridspec allows tight_layout - plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored + cbaxis, kwds = colorbar.make_axes_gridspec( + ax, **kwds + ) # gridspec allows tight_layout + plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored cb = colorbar.ColorbarBase(cbaxis, cmap=cmap, norm=norm, **kwds) @@ -611,8 +658,7 @@ def plot3dclassprobs(self, **kwds): elif class_size is "fraction": cb.set_label("Fraction of models in class") - - return {"fig":fig, "axis":ax, "cb":cb, "cbaxis": cbaxis} + return {"fig": fig, "axis": ax, "cb": cb, "cbaxis": cbaxis} def get_model(self, dG, **kwds): """Return index of best model of best class at given dG. @@ -625,7 +671,8 @@ def get_model(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ corder = kwds.pop("corder", 0) morder = kwds.pop("morder", 0) if "cls" in kwds: @@ -634,9 +681,9 @@ def get_model(self, dG, **kwds): cls = self.get_class(dG, corder=corder) if cls is None: - return self.sortedprobs[dG][-1-morder] + return self.sortedprobs[dG][-1 - morder] else: - return self.sortedclasses[dG][cls][-1-morder] + return self.sortedclasses[dG][cls][-1 - morder] def get_class(self, dG, **kwds): """Return index of best class at given dG. @@ -646,9 +693,10 @@ def get_class(self, dG, **kwds): Keywords: - corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default).""" + corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). + """ corder = kwds.pop("corder", 0) - return self.sortedclassprobs[dG][-1-corder] # index of corderth best class + return self.sortedclassprobs[dG][-1 - corder] # index of corderth best class def get_prob(self, dG, **kwds): """Return Akaike probability of best model of best class at given dG. @@ -661,7 +709,8 @@ def get_prob(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) if "cls" in kwds and kwds["cls"] is None: return self.aicprobs[dG][idx] @@ -680,7 +729,8 @@ def get_nfree(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) model = self.results[idx][1] baseline = self.results[idx][2] @@ -697,7 +747,8 @@ def get_aic(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) return self.aics[dG][idx].stat @@ -714,13 +765,16 @@ def get(self, dG, *args, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" - fdict = {"aic": self.get_aic, - "class": self.get_class, - "dg": lambda x: x, - "model": self.get_model, - "nfree": self.get_nfree, - "prob": self.get_prob} + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ + fdict = { + "aic": self.get_aic, + "class": self.get_class, + "dg": lambda x: x, + "model": self.get_model, + "nfree": self.get_nfree, + "prob": self.get_prob, + } values = [] for a in args: values.append(fdict[a].__call__(dG, **kwds)) diff --git a/diffpy/srmise/pdfdataset.py b/diffpy/srmise/pdfdataset.py index 09a404f..feb64fa 100644 --- a/diffpy/srmise/pdfdataset.py +++ b/diffpy/srmise/pdfdataset.py @@ -31,6 +31,7 @@ class PDFComponent(object): """Common base class.""" + def __init__(self, name): """initialize @@ -38,13 +39,14 @@ def __init__(self, name): """ self.name = name - def close ( self, force = False ): + def close(self, force=False): """close myself force -- if forcibly (no wait) """ pass + class PDFDataSet(PDFComponent): """PDFDataSet is a class for experimental PDF data. @@ -75,9 +77,21 @@ class PDFDataSet(PDFComponent): refinableVars -- set (dict) of refinable variable names. """ - persistentItems = [ 'robs', 'Gobs', 'drobs', 'dGobs', 'stype', 'qmax', - 'qdamp', 'qbroad', 'dscale', 'rmin', 'rmax', 'metadata' ] - refinableVars = dict.fromkeys(('qdamp', 'qbroad', 'dscale')) + persistentItems = [ + "robs", + "Gobs", + "drobs", + "dGobs", + "stype", + "qmax", + "qdamp", + "qbroad", + "dscale", + "rmin", + "rmax", + "metadata", + ] + refinableVars = dict.fromkeys(("qdamp", "qbroad", "dscale")) def __init__(self, name): """Initialize. @@ -94,7 +108,7 @@ def clear(self): self.Gobs = [] self.drobs = [] self.dGobs = [] - self.stype = 'X' + self.stype = "X" # user must specify qmax to get termination ripples self.qmax = 0.0 self.qdamp = 0.001 @@ -149,16 +163,17 @@ def read(self, filename): returns self """ try: - self.readStr(open(filename,'rb').read()) + self.readStr(open(filename, "rb").read()) except PDFDataFormatError as err: basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ( + "Could not open '%s' due to unsupported file format " + + "or corrupted data. [%s]" + ) % (basename, err) raise SrMiseFileError(emsg) self.filename = os.path.abspath(filename) return self - def readStr(self, datastring): """read experimental PDF data from a string @@ -168,15 +183,15 @@ def readStr(self, datastring): """ self.clear() # useful regex patterns: - rx = { 'f' : r'[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?' } + rx = {"f": r"[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?"} # find where does the data start - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", datastring, re.M) # start_data is position where the first data line starts if res: start_data = res.end() else: # find line that starts with a floating point number - regexp = r'^\s*%(f)s' % rx + regexp = r"^\s*%(f)s" % rx res = re.search(regexp, datastring, re.M) if res: start_data = res.start() @@ -186,18 +201,18 @@ def readStr(self, datastring): databody = datastring[start_data:].strip() # find where the metadata starts - metadata = '' - res = re.search(r'^#+\ +metadata\b\n', header, re.M) + metadata = "" + res = re.search(r"^#+\ +metadata\b\n", header, re.M) if res: - metadata = header[res.end():] - header = header[:res.start()] + metadata = header[res.end() :] + header = header[: res.start()] # parse header # stype - if re.search('(x-?ray|PDFgetX)', header, re.I): - self.stype = 'X' - elif re.search('(neutron|PDFgetN)', header, re.I): - self.stype = 'N' + if re.search("(x-?ray|PDFgetX)", header, re.I): + self.stype = "X" + elif re.search("(neutron|PDFgetN)", header, re.I): + self.stype = "N" # qmax regexp = r"\bqmax *= *(%(f)s)\b" % rx res = re.search(regexp, header, re.I) @@ -227,12 +242,12 @@ def readStr(self, datastring): regexp = r"\b(?:temp|temperature|T)\ *=\ *(%(f)s)\b" % rx res = re.search(regexp, header) if res: - self.metadata['temperature'] = float(res.groups()[0]) + self.metadata["temperature"] = float(res.groups()[0]) # doping regexp = r"\b(?:x|doping)\ *=\ *(%(f)s)\b" % rx res = re.search(regexp, header) if res: - self.metadata['doping'] = float(res.groups()[0]) + self.metadata["doping"] = float(res.groups()[0]) # parsing gerneral metadata if metadata: @@ -241,12 +256,12 @@ def readStr(self, datastring): res = re.search(regexp, metadata, re.M) if res: self.metadata[res.groups()[0]] = float(res.groups()[1]) - metadata = metadata[res.end():] + metadata = metadata[res.end() :] else: break # read actual data - robs, Gobs, drobs, dGobs - inf_or_nan = re.compile('(?i)^[+-]?(NaN|Inf)\\b') + inf_or_nan = re.compile("(?i)^[+-]?(NaN|Inf)\\b") has_drobs = True has_dGobs = True # raise PDFDataFormatError if something goes wrong @@ -257,15 +272,13 @@ def readStr(self, datastring): self.robs.append(float(v[0])) self.Gobs.append(float(v[1])) # drobs is valid if all values are defined and positive - has_drobs = (has_drobs and - len(v) > 2 and not inf_or_nan.match(v[2])) + has_drobs = has_drobs and len(v) > 2 and not inf_or_nan.match(v[2]) if has_drobs: v2 = float(v[2]) has_drobs = v2 > 0.0 self.drobs.append(v2) # dGobs is valid if all values are defined and positive - has_dGobs = (has_dGobs and - len(v) > 3 and not inf_or_nan.match(v[3])) + has_dGobs = has_dGobs and len(v) > 3 and not inf_or_nan.match(v[3]) if has_dGobs: v3 = float(v[3]) has_dGobs = v3 > 0.0 @@ -278,11 +291,12 @@ def readStr(self, datastring): raise PDFDataFormatError(err) self.rmin = self.robs[0] self.rmax = self.robs[-1] - if not has_drobs: self.drobs = len(self.robs) * [0.0] - if not has_dGobs: self.dGobs = len(self.robs) * [0.0] + if not has_drobs: + self.drobs = len(self.robs) * [0.0] + if not has_dGobs: + self.dGobs = len(self.robs) * [0.0] return self - def write(self, filename): """Write experimental PDF data to a file. @@ -291,7 +305,7 @@ def write(self, filename): No return value. """ bytes = self.writeStr() - f = open(filename, 'w') + f = open(filename, "w") f.write(bytes) f.close() return @@ -303,38 +317,43 @@ def writeStr(self): """ lines = [] # write metadata - lines.extend([ - 'History written: ' + time.ctime(), - 'produced by ' + getuser(), - '##### PDFgui' ]) + lines.extend( + [ + "History written: " + time.ctime(), + "produced by " + getuser(), + "##### PDFgui", + ] + ) # stype - if self.stype == 'X': - lines.append('stype=X x-ray scattering') - elif self.stype == 'N': - lines.append('stype=N neutron scattering') + if self.stype == "X": + lines.append("stype=X x-ray scattering") + elif self.stype == "N": + lines.append("stype=N neutron scattering") # qmax if self.qmax == 0: - qmax_line = 'qmax=0 correction not applied' + qmax_line = "qmax=0 correction not applied" else: - qmax_line = 'qmax=%.2f' % self.qmax + qmax_line = "qmax=%.2f" % self.qmax lines.append(qmax_line) # qdamp - lines.append('qdamp=%g' % self.qdamp) + lines.append("qdamp=%g" % self.qdamp) # qbroad - lines.append('qbroad=%g' % self.qbroad) + lines.append("qbroad=%g" % self.qbroad) # dscale - lines.append('dscale=%g' % self.dscale) + lines.append("dscale=%g" % self.dscale) # metadata if len(self.metadata) > 0: - lines.append('# metadata') + lines.append("# metadata") for k, v in self.metadata.iteritems(): - lines.append( "%s=%s" % (k,v) ) + lines.append("%s=%s" % (k, v)) # write data: - lines.append('##### start data') - lines.append('#L r(A) G(r) d_r d_Gr') + lines.append("##### start data") + lines.append("#L r(A) G(r) d_r d_Gr") for i in range(len(self.robs)): - lines.append('%g %g %g %g' % \ - (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i]) ) + lines.append( + "%g %g %g %g" + % (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i]) + ) # that should be it datastring = "\n".join(lines) + "\n" return datastring @@ -351,30 +370,44 @@ def copy(self, other=None): other.clear() # some attributes can be assigned, e.g., robs, Gobs, drobs, dGobs are # constant so they can be shared between copies. - assign_attributes = ( 'robs', 'Gobs', 'drobs', 'dGobs', 'stype', - 'qmax', 'qdamp', 'qbroad', 'dscale', - 'rmin', 'rmax', 'filename' ) + assign_attributes = ( + "robs", + "Gobs", + "drobs", + "dGobs", + "stype", + "qmax", + "qdamp", + "qbroad", + "dscale", + "rmin", + "rmax", + "filename", + ) # for others we will assign a copy - copy_attributes = ( 'metadata', ) + copy_attributes = ("metadata",) for a in assign_attributes: setattr(other, a, getattr(self, a)) import copy + for a in copy_attributes: setattr(other, a, copy.deepcopy(getattr(self, a))) return other + # End of class PDFDataSet class PDFDataFormatError(Exception): - """Exception class marking failure to proccess PDF data string. - """ + """Exception class marking failure to proccess PDF data string.""" + pass # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import sys + filename = sys.argv[1] dataset = PDFDataSet("test") dataset.read(filename) @@ -383,7 +416,7 @@ class PDFDataFormatError(Exception): print(k, "=", v) print("== data members ==") for k, v in dataset.__dict__.iteritems(): - if k in ('metadata', 'robs', 'Gobs', 'drobs', 'dGobs') or k[0] == "_": + if k in ("metadata", "robs", "Gobs", "drobs", "dGobs") or k[0] == "_": continue print(k, "=", v) print("== robs Gobs drobs dGobs ==") diff --git a/diffpy/srmise/peakextraction.py b/diffpy/srmise/peakextraction.py index 5dffb9f..8f1d404 100644 --- a/diffpy/srmise/peakextraction.py +++ b/diffpy/srmise/peakextraction.py @@ -58,12 +58,22 @@ def __init__(self, newvars=[]): Parameters newvars: Sequence of strings that represent additional extraction parameters.""" self.clear() - self.extractvars = dict.fromkeys(('effective_dy', 'rng', 'pf', 'initial_peaks', 'baseline', 'cres', 'error_method')) + self.extractvars = dict.fromkeys( + ( + "effective_dy", + "rng", + "pf", + "initial_peaks", + "baseline", + "cres", + "error_method", + ) + ) for k in newvars: if k not in self.extractvars: self.extractvars[k] = None else: - emsg = "Extraction variable %s conflicts with existing variable" %k + emsg = "Extraction variable %s conflicts with existing variable" % k raise ValueError(emsg) return @@ -104,7 +114,7 @@ def setdata(self, x, y, dx=None, dy=None): if len(self.x) != len(self.dx) or len(self.x) != len(self.dy): emsg = "Sequences dx and dy (if present) must have the same length as x" raise ValueError(emsg) - #self.defaultvars() + # self.defaultvars() return def setvars(self, quiet=False, **kwds): @@ -120,7 +130,8 @@ def setvars(self, quiet=False, **kwds): baseline: Baseline instance or BaselineFunction instance (use built-in estimation) error_method: ErrorEvaluator subclass instance used to compare models (default AIC) initial_peaks: Peaks instance. These peaks are present at the start of extraction. - rng: Sequence specifying the least and greatest x-values over which to extract peaks.""" + rng: Sequence specifying the least and greatest x-values over which to extract peaks. + """ for k, v in kwds.iteritems(): if k in self.extractvars: if quiet: @@ -159,24 +170,26 @@ def defaultvars(self, *args): Note that the default values of very important parameters like the uncertainty and clustering resolution are crude guesses at best. """ - if self.cres is None or 'cres' in args: - self.cres = 4*(self.x[-1] - self.x[0])/len(self.x) + if self.cres is None or "cres" in args: + self.cres = 4 * (self.x[-1] - self.x[0]) / len(self.x) - if self.effective_dy is None or 'effective_dy' in args: + if self.effective_dy is None or "effective_dy" in args: if np.all(self.dy > 0): # That is, all points positive uncertainty. self.effective_dy = self.dy else: # A terribly crude guess - self.effective_dy = .05*(np.max(self.y)-np.min(self.y))*np.ones(len(self.x)) + self.effective_dy = ( + 0.05 * (np.max(self.y) - np.min(self.y)) * np.ones(len(self.x)) + ) elif np.isscalar(self.effective_dy) and self.effective_dy > 0: - self.effective_dy = self.effective_dy*np.ones(len(self.x)) + self.effective_dy = self.effective_dy * np.ones(len(self.x)) if self.pf is None or "pf" in args: from diffpy.srmise.peaks import GaussianOverR # TODO: Make a more useful default. - self.pf = [GaussianOverR(self.x[-1]-self.x[0])] + self.pf = [GaussianOverR(self.x[-1] - self.x[0])] if self.rng is None or "rng" in args: self.rng = [self.x[0], self.x[-1]] @@ -192,37 +205,40 @@ def defaultvars(self, *args): s = self.getrangeslice() epars = self.baseline.estimate_parameters(self.x[s], self.y[s]) self.baseline = self.baseline.actualize(epars, "internal") - logger.info("Estimating baseline: %s" %self.baseline) + logger.info("Estimating baseline: %s" % self.baseline) except (NotImplementedError, SrMiseEstimationError): - logger.error("Could not estimate baseline from provided BaselineFunction, trying default values.") + logger.error( + "Could not estimate baseline from provided BaselineFunction, trying default values." + ) self.baseline = None if self.baseline is None or "baseline" in args: from diffpy.srmise.baselines import Polynomial - bl = Polynomial(degree = -1) + + bl = Polynomial(degree=-1) self.baseline = bl.actualize(np.array([]), "internal") if self.error_method is None or "error_method" in args: from diffpy.srmise.modelevaluators import AIC + self.error_method = AIC if self.initial_peaks is None or "initial_peaks" in args: self.initial_peaks = Peaks() - def __str__(self): """Return string summary of PeakExtraction.""" out = [] for k in self.extractvars: - out.append("%s: %s" %(k, getattr(self, k))) + out.append("%s: %s" % (k, getattr(self, k))) if self.extracted is not None: - out.append("Extraction type: %s" %self.extraction_type) + out.append("Extraction type: %s" % self.extraction_type) out.append("--- Extracted ---") out.append(str(self.extracted)) else: out.append("No extracted peaks exist.") - return '\n'.join(out)+'\n' + return "\n".join(out) + "\n" def plot(self, **kwds): """Convenience function to plot data and extracted peaks with matplotlib. @@ -242,10 +258,18 @@ def plot(self, **kwds): x = self.x[rangeslice] y = self.y[rangeslice] dy = self.dy[rangeslice] - mcluster = ModelCluster(self.initial_peaks, self.baseline, x, y, dy, None, self.error_method, self.pf) + mcluster = ModelCluster( + self.initial_peaks, + self.baseline, + x, + y, + dy, + None, + self.error_method, + self.pf, + ) plt.plot(*mcluster.plottable(kwds)) - def read(self, filename): """load PeakExtraction object from file @@ -254,12 +278,14 @@ def read(self, filename): returns self """ try: - self.readstr(open(filename,'rb').read()) + self.readstr(open(filename, "rb").read()) except SrMiseDataFormatError as err: logger.exception("") basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ( + "Could not open '%s' due to unsupported file format " + + "or corrupted data. [%s]" + ) % (basename, err) raise SrMiseFileError(emsg) return self @@ -288,120 +314,120 @@ def readstr(self, datastring): safebf = [] # find where the results section starts - res = re.search(r'^#+ Results\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ Results\s*(?:#.*\s+)*", datastring, re.M) if res: - results = datastring[res.end():].strip() - header = datastring[:res.start()] + results = datastring[res.end() :].strip() + header = datastring[: res.start()] # find data section, and what information it contains - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", header, re.M) if res: - start_data = header[res.end():].strip() - start_data_info = header[res.start():res.end()] - header = header[:res.start()] - res = re.search(r'^(#+L.*)$', start_data_info, re.M) + start_data = header[res.end() :].strip() + start_data_info = header[res.start() : res.end()] + header = header[: res.start()] + res = re.search(r"^(#+L.*)$", start_data_info, re.M) if res: - start_data_info = start_data_info[res.start():res.end()].strip() + start_data_info = start_data_info[res.start() : res.end()].strip() hasx = False hasy = False hasdx = False hasdy = False hasedy = False - res = re.search(r'\bx\b', start_data_info) + res = re.search(r"\bx\b", start_data_info) if res: hasx = True - res = re.search(r'\by\b', start_data_info) + res = re.search(r"\by\b", start_data_info) if res: hasy = True - res = re.search(r'\bdx\b', start_data_info) + res = re.search(r"\bdx\b", start_data_info) if res: hasdx = True - res = re.search(r'\bdy\b', start_data_info) + res = re.search(r"\bdy\b", start_data_info) if res: hasdy = True - res = re.search(r'\edy\b', start_data_info) + res = re.search(r"\edy\b", start_data_info) if res: hasedy = True - res = re.search(r'^#+ Metadata\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ Metadata\s*(?:#.*\s+)*", header, re.M) if res: - metadata = header[res.end():].strip() - header = header[:res.start()] + metadata = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ SrMiseMetadata\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ SrMiseMetadata\s*(?:#.*\s+)*", header, re.M) if res: - srmisemetadata = header[res.end():].strip() - header = header[:res.start()] + srmisemetadata = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ InitialPeaks.*$', header, re.M) + res = re.search(r"^#+ InitialPeaks.*$", header, re.M) if res: - initial_peaks = header[res.end():].strip() - header = header[:res.start()] + initial_peaks = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ BaselineObject\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ BaselineObject\s*(?:#.*\s+)*", header, re.M) if res: - baselineobject = header[res.end():].strip() - header = header[:res.start()] + baselineobject = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ PeakFunctions.*$', header, re.M) + res = re.search(r"^#+ PeakFunctions.*$", header, re.M) if res: - peakfunctions = header[res.end():].strip() - header = header[:res.start()] + peakfunctions = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ BaselineFunctions.*$', header, re.M) + res = re.search(r"^#+ BaselineFunctions.*$", header, re.M) if res: - baselinefunctions = header[res.end():].strip() - header = header[:res.start()] + baselinefunctions = header[res.end() :].strip() + header = header[: res.start()] ### Instantiating baseline functions - res = re.split(r'(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*', baselinefunctions) + res = re.split(r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions) for s in res[1:]: safebf.append(BaseFunction.factory(s, safebf)) ### Instantiating peak functions - res = re.split(r'(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*', peakfunctions) + res = re.split(r"(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*", peakfunctions) for s in res[1:]: safepf.append(BaseFunction.factory(s, safepf)) ### Instantiating Baseline object - if re.match(r'^None$', baselineobject): + if re.match(r"^None$", baselineobject): self.baseline = None - elif re.match(r'^\d+$', baselineobject): + elif re.match(r"^\d+$", baselineobject): self.baseline = safebf[int(baselineobject)] else: self.baseline = Baseline.factory(baselineobject, safebf) ### Instantiating initial peaks - if re.match(r'^None$', initial_peaks): + if re.match(r"^None$", initial_peaks): self.initial_peaks = None else: self.initial_peaks = Peaks() - res = re.split(r'(?m)^#+ InitialPeak\s*(?:#.*\s+)*', initial_peaks) + res = re.split(r"(?m)^#+ InitialPeak\s*(?:#.*\s+)*", initial_peaks) for s in res[1:]: self.initial_peaks.append(Peak.factory(s, safepf)) ### Instantiating srmise metatdata # pf - res = re.search(r'^pf=(.*)$', srmisemetadata, re.M) + res = re.search(r"^pf=(.*)$", srmisemetadata, re.M) self.pf = eval(res.groups()[0].strip()) if self.pf is not None: self.pf = [safepf[i] for i in self.pf] # cres - rx = { 'f' : r'[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?' } + rx = {"f": r"[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?"} regexp = r"\bcres *= *(%(f)s)\b" % rx res = re.search(regexp, srmisemetadata, re.I) self.cres = float(res.groups()[0]) # error_method - res = re.search(r'^ModelEvaluator=(.*)$', srmisemetadata, re.M) + res = re.search(r"^ModelEvaluator=(.*)$", srmisemetadata, re.M) __import__("diffpy.srmise.modelevaluators") module = sys.modules["diffpy.srmise.modelevaluators"] self.error_method = getattr(module, res.groups()[0].strip()) # range - res = re.search(r'^Range=(.*)$', srmisemetadata, re.M) + res = re.search(r"^Range=(.*)$", srmisemetadata, re.M) self.rng = eval(res.groups()[0].strip()) ### Instantiating other metadata @@ -440,7 +466,10 @@ def readstr(self, datastring): for line in start_data.split("\n"): l = line.split() if len(arrays) != len(l): - emsg = ("Number of value fields does not match that given by '%s'" %start_data_info) + emsg = ( + "Number of value fields does not match that given by '%s'" + % start_data_info + ) for a, v in zip(arrays, line.split()): a.append(float(v)) except (ValueError, IndexError) as err: @@ -456,15 +485,14 @@ def readstr(self, datastring): if hasedy: self.effective_dy = np.array(self.effective_dy) - ### Instantiating results - res = re.search(r'^#+ ModelCluster\s*(?:#.*\s+)*', results, re.M) + res = re.search(r"^#+ ModelCluster\s*(?:#.*\s+)*", results, re.M) if res: - mc = results[res.end():].strip() - results = results[:res.start()] + mc = results[res.end() :].strip() + results = results[: res.start()] # extraction type - res = re.search(r'^extraction_type=(.*)$', results, re.M) + res = re.search(r"^extraction_type=(.*)$", results, re.M) if res: self.extraction_type = eval(res.groups()[0].strip()) else: @@ -472,10 +500,12 @@ def readstr(self, datastring): raise SrMiseDataFormatError(emsg) # extracted - if re.match(r'^None$', mc): + if re.match(r"^None$", mc): self.extracted = None else: - self.extracted = ModelCluster.factory(mc, pfbaselist=safepf, blfbaselist=safebf) + self.extracted = ModelCluster.factory( + mc, pfbaselist=safepf, blfbaselist=safebf + ) def write(self, filename): """Write string representation of PeakExtraction instance to file. @@ -483,12 +513,11 @@ def write(self, filename): Parameters filename: the name of the file to write""" bytes = self.writestr() - f = open(filename, 'w') + f = open(filename, "w") f.write(bytes) f.close() return - def writestr(self): """Return string representation of PeakExtraction object.""" import time @@ -500,11 +529,14 @@ def writestr(self): lines = [] # Header - lines.extend([ - 'History written: ' + time.ctime(), - 'produced by ' + getuser(), - 'diffpy.srmise version %s' %__version__, - '##### PDF Peak Extraction' ]) + lines.extend( + [ + "History written: " + time.ctime(), + "produced by " + getuser(), + "diffpy.srmise version %s" % __version__, + "##### PDF Peak Extraction", + ] + ) # Generate list of PeakFunctions and BaselineFunctions # so I can refer to them by index when necessary. @@ -517,7 +549,7 @@ def writestr(self): if self.baseline is not None: if isinstance(self.baseline, BaseFunction): allbf.append(self.baseline) - else: # should be a ModelPart + else: # should be a ModelPart allbf.append(self.baseline.owner()) if self.extracted is not None: allpf.extend(self.extracted.peak_funcs) @@ -532,13 +564,13 @@ def writestr(self): # Indexed baseline functions lines.append("## BaselineFunctions") for i, bf in enumerate(safebf): - lines.append('# BaselineFunction %s' %i) + lines.append("# BaselineFunction %s" % i) lines.append(bf.writestr(safebf)) # Indexed peak functions lines.append("## PeakFunctions") for i, pf in enumerate(safepf): - lines.append('# PeakFunction %s' %i) + lines.append("# PeakFunction %s" % i) lines.append(pf.writestr(safepf)) # Baseline @@ -546,7 +578,7 @@ def writestr(self): if self.baseline is None: lines.append("None") elif self.baseline in safebf: - lines.append('%s' %repr(safebf.index(self.baseline))) + lines.append("%s" % repr(safebf.index(self.baseline))) else: lines.append(self.baseline.writestr(safebf)) @@ -556,34 +588,34 @@ def writestr(self): lines.append("None") else: for ip in self.initial_peaks: - lines.append('# InitialPeak') + lines.append("# InitialPeak") lines.append(ip.writestr(safepf)) - lines.append('# SrMiseMetadata') + lines.append("# SrMiseMetadata") # Extractable peak types if self.pf is None: lines.append("pf=None") else: - lines.append("pf=%s" %repr([safepf.index(p) for p in self.pf])) + lines.append("pf=%s" % repr([safepf.index(p) for p in self.pf])) # Clustering resolution - lines.append('cres=%g' %self.cres) + lines.append("cres=%g" % self.cres) # Model evaluator if self.error_method is None: - lines.append('ModelEvaluator=None') + lines.append("ModelEvaluator=None") else: - lines.append('ModelEvaluator=%s' %self.error_method.__name__) + lines.append("ModelEvaluator=%s" % self.error_method.__name__) # Extraction range - lines.append("Range=%s" %repr(self.rng)) + lines.append("Range=%s" % repr(self.rng)) # Everything not defined by PeakExtraction - lines.append('# Metadata') + lines.append("# Metadata") lines.append(self.writemetadata()) # Raw data used in extraction. - lines.append('##### start data') - line = ['#L'] + lines.append("##### start data") + line = ["#L"] numlines = 0 if self.x is not None: line.append("x") @@ -604,29 +636,28 @@ def writestr(self): for i in range(numlines): line = [] if self.x is not None: - line.append("%g" %self.x[i]) + line.append("%g" % self.x[i]) if self.y is not None: - line.append("%g" %self.y[i]) + line.append("%g" % self.y[i]) if self.dx is not None: - line.append("%g" %self.dx[i]) + line.append("%g" % self.dx[i]) if self.dy is not None: - line.append("%g" %self.dy[i]) + line.append("%g" % self.dy[i]) if self.effective_dy is not None: - line.append("%g" %self.effective_dy[i]) + line.append("%g" % self.effective_dy[i]) lines.append(" ".join(line)) - ### Calculated members - lines.append('##### Results') - lines.append('extraction_type=%s' %repr(self.extraction_type)) + lines.append("##### Results") + lines.append("extraction_type=%s" % repr(self.extraction_type)) lines.append("### ModelCluster") if self.extracted is None: - lines.append('None') + lines.append("None") else: lines.append(self.extracted.writestr(pfbaselist=safepf, blfbaselist=safebf)) - datastring = "\n".join(lines)+"\n" + datastring = "\n".join(lines) + "\n" return datastring def writemetadata(self): @@ -647,7 +678,7 @@ def getrangeslice(self): while self.x[low_idx] < max(self.x[0], self.rng[0]): low_idx += 1 hi_idx = len(self.x) - while self.x[hi_idx-1] > min(self.x[-1], self.rng[1]): + while self.x[hi_idx - 1] > min(self.x[-1], self.rng[1]): hi_idx -= 1 return slice(low_idx, hi_idx) @@ -675,18 +706,22 @@ def estimate_peak(self, x, add=True): # Determine clusters using initial_peaks and pre-defined or estimated baseline rangeslice = self.getrangeslice() x1 = self.x[rangeslice] - y1 = self.y[rangeslice] - self.baseline.value(x1) - self.initial_peaks.value(x1) + y1 = ( + self.y[rangeslice] + - self.baseline.value(x1) + - self.initial_peaks.value(x1) + ) dy = self.effective_dy[rangeslice] if x < x1[0] or x > x1[-1]: - emsg = "Argument x=%s outside allowed range (%s, %s)." %(x, x1[0], x1[-1]) + emsg = "Argument x=%s outside allowed range (%s, %s)." % (x, x1[0], x1[-1]) raise ValueError(emsg) # Object performing clustering on data. Note that DataClusters # provides an iterator that clusters the next point and returns # itself. Thus, dclusters and step (below) refer to the same object. - dclusters = DataClusters(x1, y1, self.cres) # Cluster with baseline removed + dclusters = DataClusters(x1, y1, self.cres) # Cluster with baseline removed dclusters.makeclusters() cidx = dclusters.find_nearest_cluster2(x)[0] cslice = dclusters.cut(cidx) @@ -695,15 +730,17 @@ def estimate_peak(self, x, add=True): y1 = y1[cslice] dy = dy[cslice] - mcluster = ModelCluster(None, None, x1, y1, dy, None, self.error_method, self.pf) + mcluster = ModelCluster( + None, None, x1, y1, dy, None, self.error_method, self.pf + ) mcluster.fit() if len(mcluster.model) > 0: if add: - logger.info("Adding peak: %s" %mcluster.model[0]) + logger.info("Adding peak: %s" % mcluster.model[0]) self.add_peaks(mcluster.model) else: - logger.info("Found peak: %s" %mcluster.model[0]) + logger.info("Found peak: %s" % mcluster.model[0]) return mcluster.model[0] else: logger.info("No peaks found.") @@ -725,12 +762,12 @@ def add_peaks(self, peaks): def extract_single(self, recursion_depth=1): """Find ModelCluster with peaks extracted from data. Return ModelCovariance instance at top level. - Every extracted peak is one of the peak functions supplied. All - comparisons of different peak models are performed with the class - specified by error_method. + Every extracted peak is one of the peak functions supplied. All + comparisons of different peak models are performed with the class + specified by error_method. - Parameters - recursion_depth: (1) Tracks recursion with extract_single.""" + Parameters + recursion_depth: (1) Tracks recursion with extract_single.""" self.clearcalc() tracer = srmiselog.tracer tracer.pushc() @@ -755,13 +792,17 @@ def extract_single(self, recursion_depth=1): # provides an iterator that clusters the next point and returns # itself. Thus, dclusters and step (below) refer to the same object. - dclusters = DataClusters(x, y, self.cres) # Cluster with baseline removed + dclusters = DataClusters(x, y, self.cres) # Cluster with baseline removed # The data for model clusters includes the baseline y = self.y[rangeslice] - ip.value(x) # List of ModelClusters containing extracted peaks. - mclusters = [ModelCluster(None, bl, x, y, dy, dclusters.cut(0), self.error_method, self.pf)] + mclusters = [ + ModelCluster( + None, bl, x, y, dy, dclusters.cut(0), self.error_method, self.pf + ) + ] # The minimum number of points required to make a valid fit, as # determined by the peak functions and error method used. This is a @@ -777,28 +818,35 @@ def extract_single(self, recursion_depth=1): stepcounter += 1 msg = "\n\n------ Recursion: %s Step: %s Cluster: %s %s ------" - logger.debug(msg, - recursion_depth, - stepcounter, - step.lastcluster_idx, - step.clusters[step.lastcluster_idx] - ) + logger.debug( + msg, + recursion_depth, + stepcounter, + step.lastcluster_idx, + step.clusters[step.lastcluster_idx], + ) # Update mclusters if len(step.clusters) > len(mclusters): # Add a new cluster - mclusters.insert(step.lastcluster_idx, - ModelCluster(None, - bl, - x, - y, - dy, - step.cut(step.lastcluster_idx), - self.error_method, - self.pf)) + mclusters.insert( + step.lastcluster_idx, + ModelCluster( + None, + bl, + x, + y, + dy, + step.cut(step.lastcluster_idx), + self.error_method, + self.pf, + ), + ) else: # Update an existing cluster - mclusters[step.lastcluster_idx].change_slice(step.cut(step.lastcluster_idx)) + mclusters[step.lastcluster_idx].change_slice( + step.cut(step.lastcluster_idx) + ) # Find newly adjacent clusters adjacent = step.find_adjacent_clusters().ravel() @@ -813,7 +861,7 @@ def extract_single(self, recursion_depth=1): assert len(adjacent) <= 3 ### Update cluster fits ### - #1. Refit clusters adjacent to at least one other cluster. + # 1. Refit clusters adjacent to at least one other cluster. for a in adjacent: mclusters[a].fit(justify=True) @@ -842,7 +890,7 @@ def extract_single(self, recursion_depth=1): # enlarged cluster ("new_cluster") or an intermediate cluster # ("adj_cluster"). - if step.lastpoint_idx == 0 or step.lastpoint_idx == len(step.x)-1: + if step.lastpoint_idx == 0 or step.lastpoint_idx == len(step.x) - 1: logger.debug("Boundary full: %s", step.lastpoint_idx) full_cluster = ModelCluster(mclusters[step.lastcluster_idx]) full_cluster.fit(True) @@ -853,7 +901,7 @@ def extract_single(self, recursion_depth=1): # Determine neighborhood appropriate for fitting (no larger than combined clusters) if len(full_cluster.model) > 0: - peak_pos = np.array([p['position'] for p in full_cluster.model]) + peak_pos = np.array([p["position"] for p in full_cluster.model]) pivot = peak_pos.searchsorted(border_x) else: peak_pos = np.array([]) @@ -871,25 +919,27 @@ def extract_single(self, recursion_depth=1): elif pivot == 1: # One peak left left_data = full_cluster.slice.indices(len(x))[0] - near_peaks = np.append(near_peaks, pivot-1) + near_peaks = np.append(near_peaks, pivot - 1) else: # left_data -> one more peak to the left - left_data = max(0, x.searchsorted(peak_pos[pivot-2])-1) - near_peaks = np.append(near_peaks, pivot-1) + left_data = max(0, x.searchsorted(peak_pos[pivot - 2]) - 1) + near_peaks = np.append(near_peaks, pivot - 1) if pivot == len(peak_pos): # No peaks right of border_x! - right_data = full_cluster.slice.indices(len(x))[1]-1 - elif pivot == len(peak_pos)-1: + right_data = full_cluster.slice.indices(len(x))[1] - 1 + elif pivot == len(peak_pos) - 1: # One peak right - right_data = full_cluster.slice.indices(len(x))[1]-1 + right_data = full_cluster.slice.indices(len(x))[1] - 1 near_peaks = np.append(near_peaks, pivot) else: # right_data -> one more peak to the right - right_data = min(len(x), x.searchsorted(peak_pos[pivot+1])+1) + right_data = min(len(x), x.searchsorted(peak_pos[pivot + 1]) + 1) near_peaks = np.append(near_peaks, pivot) - other_peaks = np.concatenate([np.arange(0, pivot-1), np.arange(pivot+1, len(peak_pos))]) + other_peaks = np.concatenate( + [np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))] + ) # Go from indices to lists of peaks. near_peaks = Peaks([full_cluster.model[i] for i in near_peaks]) @@ -900,31 +950,63 @@ def extract_single(self, recursion_depth=1): # The adjusted error is passed unchanged. This may introduce # a few more peaks than is justified, but they can be pruned # with the correct statistics at the top level of recursion. - adj_slice = slice(left_data, right_data+1) + adj_slice = slice(left_data, right_data + 1) adj_x = x[adj_slice] - adj_y = y[adj_slice]-other_peaks.value(adj_x) + adj_y = y[adj_slice] - other_peaks.value(adj_x) adj_error = dy[adj_slice] - adj_cluster = ModelCluster(near_peaks, bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) + adj_cluster = ModelCluster( + near_peaks, + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) # Recursively cluster/fit the residual rec_r = adj_x - rec_y = adj_y-near_peaks.value(rec_r) + rec_y = adj_y - near_peaks.value(rec_r) rec_error = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r, rec_y, rec_error, None, self.error_method, self.pf) - recurse = len(near_peaks) > 0 and checkrec.quality().growth_justified(checkrec, min_npars) + checkrec = ModelCluster( + None, + None, + rec_r, + rec_y, + rec_error, + None, + self.error_method, + self.pf, + ) + recurse = len(near_peaks) > 0 and checkrec.quality().growth_justified( + checkrec, min_npars + ) if recurse and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (full boundary)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (full boundary)************" + % (recursion_depth + 1) + ) rec_search = PeakExtraction() rec_search.setdata(rec_r, rec_y, None, rec_error) - rec_search.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search.extract_single(recursion_depth+1) + rec_search.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search.extract_single(recursion_depth + 1) rec = rec_search.extracted - logger.info("*********ENDING RECURSION level %s (full boundary) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (full boundary) ************\n" + % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster.augment(rec) @@ -934,15 +1016,17 @@ def extract_single(self, recursion_depth=1): full_cluster.replacepeaks(adj_cluster.model) full_cluster.fit(True) - msg = ["---Result of full boundary---", - "Original cluster:", - "%s", - "Final cluster:", - "%s", - "---End of combining clusters---"] - logger.debug("\n".join(msg), - mclusters[step.lastcluster_idx], - full_cluster) + msg = [ + "---Result of full boundary---", + "Original cluster:", + "%s", + "Final cluster:", + "%s", + "---End of combining clusters---", + ] + logger.debug( + "\n".join(msg), mclusters[step.lastcluster_idx], full_cluster + ) mclusters[step.lastcluster_idx] = full_cluster ### End update cluster fits ### @@ -954,21 +1038,22 @@ def extract_single(self, recursion_depth=1): msg = ["Current model"] msg.extend(["%s" for m in mclusters]) - logger.debug("\n".join(msg), - *[m.model for m in mclusters]) + logger.debug("\n".join(msg), *[m.model for m in mclusters]) - cleft = step.clusters[idx-1] + cleft = step.clusters[idx - 1] cright = step.clusters[idx] - new_cluster = ModelCluster.join_adjacent(mclusters[idx-1], mclusters[idx]) + new_cluster = ModelCluster.join_adjacent( + mclusters[idx - 1], mclusters[idx] + ) # Estimate coordinate where clusters combine. - border_x = .5*(x[cleft[1]]+x[cright[0]]) - border_y = .5*(y[cleft[1]]+y[cright[0]]) + border_x = 0.5 * (x[cleft[1]] + x[cright[0]]) + border_y = 0.5 * (y[cleft[1]] + y[cright[0]]) # Determine neighborhood appropriate for fitting (no larger than combined clusters) if len(new_cluster.model) > 0: - peak_pos = np.array([p['position'] for p in new_cluster.model]) + peak_pos = np.array([p["position"] for p in new_cluster.model]) pivot = peak_pos.searchsorted(border_x) else: peak_pos = np.array([]) @@ -982,29 +1067,31 @@ def extract_single(self, recursion_depth=1): # interpeak range goes from peak to peak of next nearest peaks, although their contributions to the data are still removed. if pivot == 0: # No peaks left of border_x! - left_data=new_cluster.slice.indices(len(x))[0] + left_data = new_cluster.slice.indices(len(x))[0] elif pivot == 1: # One peak left left_data = new_cluster.slice.indices(len(x))[0] - near_peaks = np.append(near_peaks, pivot-1) + near_peaks = np.append(near_peaks, pivot - 1) else: # left_data -> one more peak to the left - left_data = max(0,x.searchsorted(peak_pos[pivot-2])-1) - near_peaks = np.append(near_peaks, pivot-1) + left_data = max(0, x.searchsorted(peak_pos[pivot - 2]) - 1) + near_peaks = np.append(near_peaks, pivot - 1) if pivot == len(peak_pos): # No peaks right of border_x! - right_data = new_cluster.slice.indices(len(x))[1]-1 - elif pivot == len(peak_pos)-1: + right_data = new_cluster.slice.indices(len(x))[1] - 1 + elif pivot == len(peak_pos) - 1: # One peak right - right_data = new_cluster.slice.indices(len(x))[1]-1 + right_data = new_cluster.slice.indices(len(x))[1] - 1 near_peaks = np.append(near_peaks, pivot) else: # right_data -> one more peak to the right - right_data = min(len(x), x.searchsorted(peak_pos[pivot+1])+1) + right_data = min(len(x), x.searchsorted(peak_pos[pivot + 1]) + 1) near_peaks = np.append(near_peaks, pivot) - other_peaks = np.concatenate([np.arange(0, pivot-1), np.arange(pivot+1, len(peak_pos))]) + other_peaks = np.concatenate( + [np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))] + ) # Go from indices to lists of peaks. near_peaks = Peaks([new_cluster.model[i] for i in near_peaks]) @@ -1015,17 +1102,35 @@ def extract_single(self, recursion_depth=1): # The adjusted error is passed unchanged. This may introduce # a few more peaks than is justified, but they can be pruned # with the correct statistics at the top level of recursion. - adj_slice = slice(left_data, right_data+1) + adj_slice = slice(left_data, right_data + 1) adj_x = x[adj_slice] - adj_y = y[adj_slice]-other_peaks.value(adj_x) + adj_y = y[adj_slice] - other_peaks.value(adj_x) adj_error = dy[adj_slice] #### Perform recursion on a version that is scaled at the # border, as well as on that is simply fit beforehand. In # many cases these lead to nearly identical results, but # occasionally one works much better than the other. - adj_cluster1 = ModelCluster(near_peaks.copy(), bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) - adj_cluster2 = ModelCluster(near_peaks.copy(), bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) + adj_cluster1 = ModelCluster( + near_peaks.copy(), + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) + adj_cluster2 = ModelCluster( + near_peaks.copy(), + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) # Adjust cluster at border if there is at least one peak on # either side. @@ -1034,23 +1139,44 @@ def extract_single(self, recursion_depth=1): # Recursively cluster/fit the residual rec_r1 = adj_x - #rec_y1 = adj_y - near_peaks.value(rec_r1) + # rec_y1 = adj_y - near_peaks.value(rec_r1) rec_y1 = adj_y - adj_cluster1.model.value(rec_r1) rec_error1 = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r1, rec_y1, rec_error1, None, self.error_method, self.pf) + checkrec = ModelCluster( + None, + None, + rec_r1, + rec_y1, + rec_error1, + None, + self.error_method, + self.pf, + ) recurse1 = checkrec.quality().growth_justified(checkrec, min_npars) if recurse1 and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (reduce at border)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (reduce at border)************" + % (recursion_depth + 1) + ) rec_search1 = PeakExtraction() rec_search1.setdata(rec_r1, rec_y1, None, rec_error1) - rec_search1.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search1.extract_single(recursion_depth+1) + rec_search1.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search1.extract_single(recursion_depth + 1) rec1 = rec_search1.extracted - logger.info("*********ENDING RECURSION level %s (reduce at border) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (reduce at border) ************\n" + % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster1.augment(rec1) @@ -1060,23 +1186,46 @@ def extract_single(self, recursion_depth=1): # Recursively cluster/fit the residual rec_r2 = adj_x - #rec_y2 = adj_y - near_peaks.value(rec_r2) + # rec_y2 = adj_y - near_peaks.value(rec_r2) rec_y2 = adj_y - adj_cluster2.model.value(rec_r2) rec_error2 = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r2, rec_y2, rec_error2, None, self.error_method, self.pf) - recurse2 = len(near_peaks) > 0 and checkrec.quality().growth_justified(checkrec, min_npars) + checkrec = ModelCluster( + None, + None, + rec_r2, + rec_y2, + rec_error2, + None, + self.error_method, + self.pf, + ) + recurse2 = len(near_peaks) > 0 and checkrec.quality().growth_justified( + checkrec, min_npars + ) if recurse2 and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (prefit)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (prefit)************" + % (recursion_depth + 1) + ) rec_search2 = PeakExtraction() rec_search2.setdata(rec_r2, rec_y2, None, rec_error2) - rec_search2.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search2.extract_single(recursion_depth+1) + rec_search2.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search2.extract_single(recursion_depth + 1) rec2 = rec_search2.extracted - logger.info("*********ENDING RECURSION level %s (prefit) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (prefit) ************\n" + % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster2.augment(rec2) @@ -1095,22 +1244,22 @@ def extract_single(self, recursion_depth=1): new_cluster.fit(True) - - msg = ["---Result of combining clusters---", - "First cluster:", - "%s", - "Second cluster:", - "%s", - "Resulting cluster:", - "%s", - "---End of combining clusters---"] - - logger.debug("\n".join(msg), - mclusters[idx-1], - mclusters[idx], - new_cluster) - - mclusters[idx-1] = new_cluster + msg = [ + "---Result of combining clusters---", + "First cluster:", + "%s", + "Second cluster:", + "%s", + "Resulting cluster:", + "%s", + "---End of combining clusters---", + ] + + logger.debug( + "\n".join(msg), mclusters[idx - 1], mclusters[idx], new_cluster + ) + + mclusters[idx - 1] = new_cluster del mclusters[idx] ### End combine adjacent clusters loop ### @@ -1121,7 +1270,6 @@ def extract_single(self, recursion_depth=1): tracer.emit(*mclusters) - ### End main extraction loop ### ################################ @@ -1166,13 +1314,20 @@ def fit_single(self): dy = self.effective_dy[rngslice] # Set up ModelCluster - ext = ModelCluster(self.initial_peaks, self.baseline, x, y, dy, None, - self.error_method, self.pf) + ext = ModelCluster( + self.initial_peaks, + self.baseline, + x, + y, + dy, + None, + self.error_method, + self.pf, + ) # Fit model with baseline and calculate covariance matrix cov = ModelCovariance() - ext.fit(fitbaseline=True, estimate=False, cov=cov, - cov_format="default_output") + ext.fit(fitbaseline=True, estimate=False, cov=cov, cov_format="default_output") # Update calculated instance variables self.extraction_type = "fit_single" @@ -1180,11 +1335,12 @@ def fit_single(self): return cov -#end PeakExtraction class + +# end PeakExtraction class # simple test code -if __name__ == '__main__': +if __name__ == "__main__": from numpy.random import randn @@ -1195,13 +1351,13 @@ def fit_single(self): srmiselog.setlevel("info") srmiselog.liveplotting(False) - pf = GaussianOverR(.7) - res = .01 + pf = GaussianOverR(0.7) + res = 0.01 - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - r = np.arange(2,4,res) + r = np.arange(2, 4, res) y = ideal_peaks.value(r) + randn(len(r)) err = np.ones(len(r)) @@ -1209,7 +1365,7 @@ def fit_single(self): te = PeakExtraction() te.setdata(r, y, None, err) - te.setvars(rng=[1.51,10.], pf=[pf], cres=.1, effective_dy = 1.5*err) + te.setvars(rng=[1.51, 10.0], pf=[pf], cres=0.1, effective_dy=1.5 * err) te.extract_single() print("--- Actual Peak parameters ---") diff --git a/diffpy/srmise/peakstability.py b/diffpy/srmise/peakstability.py index 6ee38dd..fcf5e31 100644 --- a/diffpy/srmise/peakstability.py +++ b/diffpy/srmise/peakstability.py @@ -27,7 +27,7 @@ class PeakStability: """Utility to test robustness of peaks. results: [error scalar, model, bl, dr] - ppe: a PDFPeakExtraction instance """ + ppe: a PDFPeakExtraction instance""" def __init__(self): self.results = [] @@ -39,11 +39,11 @@ def setppe(self, ppe): def load(self, filename): try: - import cPickle as pickle + import cPickle as pickle except: - import pickle + import pickle - in_s = open(filename, 'rb') + in_s = open(filename, "rb") try: (self.results, ppestr) = pickle.load(in_s) self.ppe = PDFPeakExtraction() @@ -67,10 +67,10 @@ def load(self, filename): def save(self, filename): try: - import cPickle as pickle + import cPickle as pickle except: - import pickle - out_s = open(filename, 'wb') + import pickle + out_s = open(filename, "wb") try: # Write to the stream outstr = self.ppe.writestr() @@ -82,18 +82,23 @@ def save(self, filename): if r[2] is None: bldict = None else: - bldict = {"pars":r[2].pars, "free":r[2].free, "removable":r[2].removable, "static_owner":r[2].static_owner} + bldict = { + "pars": r[2].pars, + "free": r[2].free, + "removable": r[2].removable, + "static_owner": r[2].static_owner, + } results2.append([r[0], r[1], bldict, r[3]]) pickle.dump([results2, outstr], out_s) finally: out_s.close() - def plotseries(self, style='o', **kwds): + def plotseries(self, style="o", **kwds): plt.figure() plt.ioff() for e, r, bl, dr in self.results: peakpos = [p["position"] for p in r] - es = [e]*len(peakpos) + es = [e] * len(peakpos) plt.plot(peakpos, es, style, **kwds) plt.ion() plt.draw() @@ -103,19 +108,32 @@ def plot(self, **kwds): plt.clf() plt.plot(*self.ppe.extracted.plottable(), **kwds) q = self.ppe.extracted.quality() - plt.suptitle("[%i/%i]\n" - "Uncertainty: %6.3f. Peaks: %i.\n" - "Quality: %6.3f. Chi-square: %6.3f" - %(self.current+1, len(self.results), self.ppe.effective_dy[0], len(self.ppe.extracted.model), q.stat, q.chisq)) + plt.suptitle( + "[%i/%i]\n" + "Uncertainty: %6.3f. Peaks: %i.\n" + "Quality: %6.3f. Chi-square: %6.3f" + % ( + self.current + 1, + len(self.results), + self.ppe.effective_dy[0], + len(self.ppe.extracted.model), + q.stat, + q.chisq, + ) + ) def setcurrent(self, idx): """Make the idxth model the active one.""" self.current = idx if idx is not None: result = self.results[idx] - self.ppe.setvars(quiet=True, effective_dy=result[0]*np.ones(len(self.ppe.x))) + self.ppe.setvars( + quiet=True, effective_dy=result[0] * np.ones(len(self.ppe.x)) + ) (r, y, dr, dy) = self.ppe.resampledata(result[3]) - self.ppe.extracted = ModelCluster(result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf) + self.ppe.extracted = ModelCluster( + result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf + ) else: self.ppe.clearcalc() @@ -153,18 +171,22 @@ def run(self, err, savecovs=False): self.results = [] covs = [] for i, e in enumerate(err): - print("---- Running for uncertainty %s (%i/%i) ----" %(e, i, len(err))) + print("---- Running for uncertainty %s (%i/%i) ----" % (e, i, len(err))) self.ppe.clearcalc() self.ppe.setvars(effective_dy=e) if savecovs: covs.append(self.ppe.extract()) else: self.ppe.extract() - dr = (self.ppe.extracted.r_cluster[-1]-self.ppe.extracted.r_cluster[0])/(len(self.ppe.extracted.r_cluster)-1) - self.results.append([e, self.ppe.extracted.model, self.ppe.extracted.baseline, dr]) + dr = ( + self.ppe.extracted.r_cluster[-1] - self.ppe.extracted.r_cluster[0] + ) / (len(self.ppe.extracted.r_cluster) - 1) + self.results.append( + [e, self.ppe.extracted.model, self.ppe.extracted.baseline, dr] + ) for e, r, bl, dr in self.results: - print("---- Results for uncertainty %s ----" %e) + print("---- Results for uncertainty %s ----" % e) print(r) return covs diff --git a/diffpy/srmise/srmiselog.py b/diffpy/srmise/srmiselog.py index 1c5f5f1..d971b73 100644 --- a/diffpy/srmise/srmiselog.py +++ b/diffpy/srmise/srmiselog.py @@ -48,11 +48,13 @@ defaultformat = "%(message)s" defaultlevel = logging.INFO -LEVELS = {'debug': logging.DEBUG, - 'info': logging.INFO, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL} +LEVELS = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, +} ### Set up logging to stdout ### logger = logging.getLogger("diffpy.srmise") @@ -72,13 +74,15 @@ liveplots = False wait = False + def addfilelog(filename, level=defaultlevel, format=defaultformat): """Log output from diffpy.srmise in specified file. Parameters filename: Name of file to receiving output level: The logging level - format: A string defining format of output messages conforming to logging package.""" + format: A string defining format of output messages conforming to logging package. + """ global fh fh = logging.FileHandler(filename) fh.setLevel(level) @@ -86,6 +90,7 @@ def addfilelog(filename, level=defaultlevel, format=defaultformat): fh.setFormatter(formatter) logger.addHandler(fh) + def setfilelevel(level): """Set level of file logger. @@ -101,6 +106,7 @@ def setfilelevel(level): emsg = "File handler does not exist, cannot set its level." raise SrMiseLogError(emsg) + def setlevel(level): """Set level of default (stdout) logger. @@ -112,6 +118,7 @@ def setlevel(level): if level < logger.getEffectiveLevel(): logger.setLevel(level) + def liveplotting(lp, w=False): """Set whether or not to use live plotting. @@ -138,6 +145,7 @@ def liveplotting(lp, w=False): ### TracePeaks. Primary purpose is to enable creating movies. ### + class TracePeaks(object): """Output trace information during peak extraction.""" @@ -191,15 +199,20 @@ def maketrace(self, *args, **kwds): clusters.append(m.slice) for m in args[1:]: mc.replacepeaks(m.model) - return {"mc":mc, "clusters":clusters, "recursion":self.recursion, "counter":self.counter} + return { + "mc": mc, + "clusters": clusters, + "recursion": self.recursion, + "counter": self.counter, + } def writestr(self, trace): """Return string representation of current trace.""" lines = [] lines.append("### Trace") - lines.append("counter=%i" %trace["counter"]) - lines.append("recursion=%i" %trace["recursion"]) - lines.append("clusters=%s" %trace["clusters"]) + lines.append("counter=%i" % trace["counter"]) + lines.append("recursion=%i" % trace["recursion"]) + lines.append("clusters=%s" % trace["clusters"]) lines.append("### ModelCluster") lines.append(trace["mc"].writestr()) @@ -207,8 +220,8 @@ def writestr(self, trace): def write(self, trace): """Write current trace to file.""" - filename = "%s_%i" %(self.filebase, trace["counter"]) - f = open(filename, 'w') + filename = "%s_%i" % (self.filebase, trace["counter"]) + f = open(filename, "w") bytes = self.writestr(trace) f.write(bytes) f.close() @@ -225,12 +238,14 @@ def read(self, filename): "mc" - A ModelCluster instance "recursion" - The recursion level of mc""" try: - return self.readstr(open(filename,'rb').read()) + return self.readstr(open(filename, "rb").read()) except SrMiseDataFormatError as err: logger.exception("") basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ( + "Could not open '%s' due to unsupported file format " + + "or corrupted data. [%s]" + ) % (basename, err) raise SrMiseFileError(emsg) return None @@ -247,43 +262,49 @@ def readstr(self, datastring): "recursion" - The recursion level of mc""" # find where the ModelCluster section starts - res = re.search(r'^#+ ModelCluster\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ ModelCluster\s*(?:#.*\s+)*", datastring, re.M) if res: - header = datastring[:res.start()] - mc = datastring[res.end():].strip() + header = datastring[: res.start()] + mc = datastring[res.end() :].strip() else: emsg = "Required section 'ModelCluster' not found." raise SrMiseDataFormatError(emsg) # instantiate ModelCluster - if re.match(r'^None$', mc): + if re.match(r"^None$", mc): mc = None else: from diffpy.srmise.modelcluster import ModelCluster + mc = ModelCluster.factory(mc) - res = re.search(r'^clusters=(.*)$', header, re.M) + res = re.search(r"^clusters=(.*)$", header, re.M) if res: clusters = eval(res.groups()[0].strip()) else: emsg = "Required field 'clusters' not found." raise SrMiseDataFormatError(emsg) - res = re.search(r'^recursion=(.*)$', header, re.M) + res = re.search(r"^recursion=(.*)$", header, re.M) if res: recursion = eval(res.groups()[0].strip()) else: emsg = "Required field 'recursion' not found." raise SrMiseDataFormatError(emsg) - res = re.search(r'^counter=(.*)$', header, re.M) + res = re.search(r"^counter=(.*)$", header, re.M) if res: counter = eval(res.groups()[0].strip()) else: emsg = "Required field 'counter' not found." raise SrMiseDataFormatError(emsg) - return {"mc":mc, "clusters":clusters, "recursion":self.recursion, "counter":self.counter} + return { + "mc": mc, + "clusters": clusters, + "recursion": self.recursion, + "counter": self.counter, + } def pushr(self): """Enter a layer of recursion, and return new level.""" @@ -315,16 +336,24 @@ def reset_trace(self): # filter property def setfilter(self, filter): - self.__filter = compile(" and ".join(["(%s)" %f for f in filter]), '', 'eval') - def getfilter(self): return self.__filter + self.__filter = compile( + " and ".join(["(%s)" % f for f in filter]), "", "eval" + ) + + def getfilter(self): + return self.__filter + filter = property(getfilter, setfilter) + ### End of class TracePeaks + def settracer(**kwds): global tracer tracer = TracePeaks(**kwds) return tracer + # Default tracer never emits tracer = settracer()