diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e7c3aea --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +diffpy.srmise/_version.py export-subst diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000..9cef943 --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,11 @@ +Authors +======= + +Luke Granlund +Billinge Group and community contibutors. + +Contributors +------------ + +For a list of contributors, visit +https://github.com/diffpy/diffpy.srmise/graphs/contributors diff --git a/AUTHORS.txt b/AUTHORS.txt deleted file mode 100644 index be8310a..0000000 --- a/AUTHORS.txt +++ /dev/null @@ -1,3 +0,0 @@ -SrMise (diffpy.srmise) authors: - -Luke Granlund diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..2669451 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,5 @@ +============= +Release Notes +============= + +.. current developments diff --git a/CODE_OF_CONDUCT.rst b/CODE_OF_CONDUCT.rst new file mode 100644 index 0000000..ff9c356 --- /dev/null +++ b/CODE_OF_CONDUCT.rst @@ -0,0 +1,133 @@ +===================================== + Contributor Covenant Code of Conduct +===================================== + +Our Pledge +---------- + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +Our Standards +------------- + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +Enforcement Responsibilities +---------------------------- + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +Scope +----- + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +Enforcement +----------- + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +sb2896@columbia.edu. All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +Enforcement Guidelines +---------------------- + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +1. Correction +**************** + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +2. Warning +************* + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +3. Temporary Ban +****************** + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +4. Permanent Ban +****************** + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +Attribution +----------- + +This Code of Conduct is adapted from the `Contributor Covenant `_. + +Community Impact Guidelines were inspired by `Mozilla's code of conduct enforcement ladder `_. + +For answers to common questions about this code of conduct, see the `FAQ `_. `Translations are available `_ diff --git a/LICENSE.rst b/LICENSE.rst new file mode 100644 index 0000000..7c8f8d0 --- /dev/null +++ b/LICENSE.rst @@ -0,0 +1,44 @@ +BSD 3-Clause License + +Copyright 2014-2015, Board of Trustees of Michigan State University + 2016-2024, The Trustees of Columbia University in the City of New York. +All rights reserved. + +If you use this program to do productive scientific research that +leads to publication, we ask that you acknowledge use of the +program by citing the following paper in your publication: + + L. Granlund, S.J.L. Billinge, P.M. Duxbury, Algorithm for + systematic peak extraction from atomic pair distribution + functions, Acta Crystallographica A 71(4), 392-409 (2015). + doi:10.1107/S2053273315005276 + +For more information please visit the diffpy web-page at + http://www.diffpy.org +or email Luke Granlund at luke.r.granlund@gmail.com, or Prof. Simon +Billinge at sb2896@columbia.edu. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100644 index 23b3331..0000000 --- a/LICENSE.txt +++ /dev/null @@ -1,46 +0,0 @@ -If you use this program to do productive scientific research that -leads to publication, we ask that you acknowledge use of the -program by citing the following paper in your publication: - - L. Granlund, S.J.L. Billinge, P.M. Duxbury, Algorithm for - systematic peak extraction from atomic pair distribution - functions, Acta Crystallographica A 71(4), 392-409 (2015). - doi:10.1107/S2053273315005276 - -Copyright 2014-2015, Board of Trustees of Michigan State University - -For more information please visit the diffpy web-page at - http://www.diffpy.org -or email Luke Granlund at luke.r.granlund@gmail.com, or Prof. Simon -Billinge at sb2896@columbia.edu. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER "AS IS". COPYRIGHT -HOLDER EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES AND CONDITIONS, -EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY, TITLE, FITNESS, ADEQUACY OR -SUITABILITY FOR A PARTICULAR PURPOSE, AND ANY WARRANTIES OF FREEDOM -FROM INFRINGEMENT OF ANY DOMESTIC OR FOREIGN PATENT, COPYRIGHTS, -TRADE SECRETS OR OTHER PROPRIETARY RIGHTS OF ANY PARTY. IN NO EVENT -SHALL COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE OR RELATING TO -THIS AGREEMENT, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE_PDFgui.rst b/LICENSE_PDFgui.rst new file mode 100644 index 0000000..3ba9b69 --- /dev/null +++ b/LICENSE_PDFgui.rst @@ -0,0 +1,51 @@ +BSD 3-Clause License + +Copyright 2006-2007, Board of Trustees of Michigan State University + 2008-2024, The Trustees of Columbia University in the City of New York. +All rights reserved. + +SrMise incorporates source code from diffpy.pdfgui in the file +pdfdataset.py. The PDFgui license is reproduced in full below. + +This program is part of the DiffPy and DANSE open-source projects +and is available subject to the conditions and terms laid out +below. + +If you use this program to do productive scientific research that +leads to publication, we ask that you acknowledge use of the +program by citing the following paper in your publication: + + C. L. Farrow, P. Juhas, J. W. Liu, D. Bryndin, E. S. Bozin, + J. Bloch, Th. Proffen and S. J. L. Billinge, PDFfit2 and + PDFgui: computer programs for studying nanostructure in + crystals, J. Phys.: Condens. Matter 19, 335219 (2007) + +For more information please visit the diffpy web-page at + http://www.diffpy.org +or email Luke Granlund at luke.r.granlund@gmail.com, or Prof. Simon +Billinge at sb2896@columbia.edu. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE_PDFgui.txt b/LICENSE_PDFgui.txt deleted file mode 100644 index 4e56059..0000000 --- a/LICENSE_PDFgui.txt +++ /dev/null @@ -1,56 +0,0 @@ -SrMise incorporates source code from diffpy.pdfgui in the file -pdfdataset.py. The PDFgui license is reproduced in full below. -=================================================================== - -This program is part of the DiffPy and DANSE open-source projects -and is available subject to the conditions and terms laid out -below. - -If you use this program to do productive scientific research that -leads to publication, we ask that you acknowledge use of the -program by citing the following paper in your publication: - - C. L. Farrow, P. Juhas, J. W. Liu, D. Bryndin, E. S. Bozin, - J. Bloch, Th. Proffen and S. J. L. Billinge, PDFfit2 and - PDFgui: computer programs for studying nanostructure in - crystals, J. Phys.: Condens. Matter 19, 335219 (2007) - -Copyright 2006-2007, Board of Trustees of Michigan State -University, Copyright 2008-2009, Board of Trustees of Columbia -University in the city of New York. (Copyright holder indicated in -each source file). - -For more information please visit the project web-page: - http://www.diffpy.org/ -or email Prof. Simon Billinge at sb2896@columbia.edu - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER "AS IS". COPYRIGHT -HOLDER EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES AND CONDITIONS, -EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY, TITLE, FITNESS, ADEQUACY OR -SUITABILITY FOR A PARTICULAR PURPOSE, AND ANY WARRANTIES OF FREEDOM -FROM INFRINGEMENT OF ANY DOMESTIC OR FOREIGN PATENT, COPYRIGHTS, -TRADE SECRETS OR OTHER PROPRIETARY RIGHTS OF ANY PARTY. IN NO EVENT -SHALL COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE OR RELATING TO -THIS AGREEMENT, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/devutils/makesdist b/devutils/makesdist index 1d81cf0..380f9e1 100644 --- a/devutils/makesdist +++ b/devutils/makesdist @@ -1,8 +1,8 @@ #!/usr/bin/env python -'''Create source distribution tar.gz archive, where each file belongs +"""Create source distribution tar.gz archive, where each file belongs to a root user and modification time is set to the git commit time. -''' +""" import sys import os @@ -17,17 +17,18 @@ sys.path.insert(0, BASEDIR) from setup import versiondata timestamp = versiondata.getint('DEFAULT', 'timestamp') -print 'Run "setup.py sdist --formats=tar"', +print('Run "setup.py sdist --formats=tar"',) cmd_sdist = [sys.executable] + 'setup.py sdist --formats=tar'.split() ec = subprocess.call(cmd_sdist, cwd=BASEDIR, stdout=open(os.devnull, 'w')) if ec: sys.exit(ec) -print "[done]" +print("[done]") tarname = max(glob.glob(BASEDIR + '/dist/*.tar'), key=os.path.getmtime) tfin = tarfile.open(tarname) tfout = tarfile.open(tarname + '.gz', 'w:gz') + def fixtarinfo(tinfo): tinfo.uid = tinfo.gid = 0 tinfo.uname = tinfo.gname = 'root' @@ -36,8 +37,8 @@ def fixtarinfo(tinfo): return tinfo -print 'Filter %s --> %s.gz' % (2 * (os.path.basename(tarname),)), +print('Filter %s --> %s.gz' % (2 * (os.path.basename(tarname),)),) for ti in tfin: tfout.addfile(fixtarinfo(ti), tfin.extractfile(ti)) os.remove(tarname) -print "[done]" +print("[done]") diff --git a/devutils/prep.py b/devutils/prep.py index c63ce0f..9eee5e8 100644 --- a/devutils/prep.py +++ b/devutils/prep.py @@ -6,7 +6,9 @@ import re import sys -__basedir__ = os.getcwdu() +__basedir__ = os.getcwd() + +from numpy.compat import unicode # Example imports @@ -18,20 +20,21 @@ def __init__(self): def test(self, call, *args, **kwds): m = sys.modules[call.__module__] - testname = m.__name__+'.'+call.__name__ + testname = m.__name__ + "." + call.__name__ path = os.path.dirname(m.__file__) os.chdir(path) try: call(*args, **kwds) - self.messages.append("%s: success" %testname) - except Exception, e: - self.messages.append("%s: error, details below.\n%s" %(testname, e)) + self.messages.append("%s: success" % testname) + except Exception as e: + self.messages.append("%s: error, details below.\n%s" % (testname, e)) finally: os.chdir(__basedir__) def report(self): - print '==== Results of Tests ====' - print '\n'.join(self.messages) + print("==== Results of Tests ====") + print("\n".join(self.messages)) + def scrubeol(directory, filerestr): """Use unix-style endlines for files in directory matched by regex string. @@ -50,11 +53,11 @@ def scrubeol(directory, filerestr): text = unicode(original.read()) original.close() - updated = io.open(f, 'w', newline='\n') + updated = io.open(f, "w", newline="\n") updated.write(text) updated.close() - print "Updated %s to unix-style endlines." %f + print("Updated %s to unix-style endlines." % f) def rm(directory, filerestr): @@ -72,30 +75,31 @@ def rm(directory, filerestr): for f in files: os.remove(f) - print "Deleted %s." %f - + print("Deleted %s." % f) if __name__ == "__main__": # Temporarily add examples to path - lib_path = os.path.abspath(os.path.join('..','doc','examples')) + lib_path = os.path.abspath(os.path.join("..", "doc", "examples")) sys.path.append(lib_path) # Delete existing files that don't necessarily have a fixed name. rm("../doc/examples/output", r"known_dG.*\.pwa") rm("../doc/examples/output", r"unknown_dG.*\.pwa") - ### Testing examples + # Testing examples examples = Test() - test_names = ["extract_single_peak", - "parameter_summary", - "fit_initial", - "query_results", - "multimodel_known_dG1", - "multimodel_known_dG2", - "multimodel_unknown_dG1", - "multimodel_unknown_dG2"] + test_names = [ + "extract_single_peak", + "parameter_summary", + "fit_initial", + "query_results", + "multimodel_known_dG1", + "multimodel_known_dG2", + "multimodel_unknown_dG1", + "multimodel_unknown_dG2", + ] test_modules = [] for test in test_names: @@ -106,10 +110,8 @@ def rm(directory, filerestr): examples.report() - ### Convert output of example files to Unix-style endlines for sdist. - if os.linesep != '\n': - print "==== Scrubbing Endlines ====" + # Convert output of example files to Unix-style endlines for sdist. + if os.linesep != "\n": + print("==== Scrubbing Endlines ====") # All *.srmise and *.pwa files in examples directory. scrubeol("../doc/examples/output", r".*(\.srmise|\.pwa)") - - diff --git a/diffpy/__init__.py b/diffpy/__init__.py deleted file mode 100644 index 7170a04..0000000 --- a/diffpy/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -######################################################################## -# -# diffpy by DANSE Diffraction group -# Simon J. L. Billinge -# (c) 2008 The Trustees of Columbia University -# in the City of New York. All rights reserved. -# -# File coded by: Pavol Juhas -# -# See AUTHORS.txt for a list of people who contributed. -# See LICENSE_DANSE.txt for license information. -# -######################################################################## - - -"""diffpy - tools for structure analysis by diffraction. - -Blank namespace package. -""" - - -__import__("pkg_resources").declare_namespace(__name__) - - -# End of file diff --git a/diffpy/srmise/__init__.py b/diffpy/srmise/__init__.py deleted file mode 100644 index 532c767..0000000 --- a/diffpy/srmise/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -"""Tools for peak extraction from PDF.""" - -__all__ = [ - "basefunction", - "srmiseerrors", - "srmiselog", - "dataclusters", - "modelcluster", - "modelparts", - "pdfdataset", - "pdfpeakextraction", - "peakextraction", - "peakstability", - "multimodelselection", -] - -from basefunction import BaseFunction -from dataclusters import DataClusters -from modelcluster import ModelCluster, ModelCovariance -from modelparts import ModelPart, ModelParts -from multimodelselection import MultimodelSelection -from pdfdataset import PDFDataSet -from pdfpeakextraction import PDFPeakExtraction -from peakextraction import PeakExtraction -from peakstability import PeakStability - -from diffpy.srmise.version import __version__ diff --git a/diffpy/srmise/applications/extract.py b/diffpy/srmise/applications/extract.py deleted file mode 100755 index 5bbc646..0000000 --- a/diffpy/srmise/applications/extract.py +++ /dev/null @@ -1,508 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014-2015 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -from optparse import OptionGroup, OptionParser - -import matplotlib.pyplot as plt -import numpy as np - - -def main(): - """Default SrMise entry-point.""" - - usage = ("usage: %prog pdf_file [options]\n" - "pdf_file is a file containing a PDF (accepts several " - "common formats), or a .srmise file.") - - from diffpy.srmise import __version__ - version = "diffpy.srmise "+__version__ - - descr = ("The SrMise package is a tool to aid extracting and fitting peaks " - "that comprise a pair distribution function. This script exposes " - "basic peak extraction functionality. For many PDFs it is " - "sufficient to specify the range, baseline, and sometimes an ad " - "hoc uncertainty. See the discussion of these options below for " - "further guidance.") - - epilog = ("Options set above override those from an existing .srmise " - "file, as well as the usual defaults summarized here.\n\n" - "Defaults (when qmax > 0)\n" - "------------------------\n" - "baseline - None (identically 0).\n" - "dg - The uncertainty reported in the PDF (if any), otherwise " - "5% of maximum value of PDF.\n" - "nyquist - True\n" - "range - All the data\n" - "cres - The Nyquist rate.\n" - "supersample - 4.0\n" - "scale - (Deprecated) False\n\n" - "Defaults (when qmax = 0)\n" - "------------------------\n" - "baseline - as above\n" - "dg - as above\n" - "nyquist - False (and no effect if True)\n" - "range - as above\n" - "cres - Four times the average distance between data points\n" - "supersample - Parameter has no effect.\n" - "scale - (Deprecated) False, and no effect if True\n\n" - "Known issues\n" - "------------\n" - "1) Peak extraction works best when the data are moderately " - "oversampled first. When qmax > 0 this is handled " - "automatically, but when qmax = 0 no resampling of any kind is " - "performed.\n" - "2) Peak extraction performed on a PDF file and a .srmise file " - "derived from that data with identical extraction parameters " - "can give different results even on the same platform. This is " - "because the original data may undergo some processing before it " - "can be saved by SrMise. For consistent results, always specify " - "the original PDF, or always load the PDF from a .srmise file " - "you save before performing any peak extraction on that data.\n" - "3) Liveplotting depends on the matplotlib backend, and doesn't " - "implement an idle handler, so interaction with its window will " - "likely cause a freeze.") - - # TODO: Move to argparse (though not in 2.6 by default) to handle - # variable-length options without callbacks. Longterm, the major - # value is using the same option to specify a baseline that should - # use estimation vs. one that should use explicitly provided pars. - parser = OptionParser(usage=usage, description=descr, epilog=epilog, - version=version, - formatter=IndentedHelpFormatterWithNL()) - - parser.set_defaults(plot=False, liveplot=False, wait=False, - performextraction=True, verbosity="warning") - dg_defaults = {'absolute':None, 'data':None, 'max-fraction':.05, - 'ptp-fraction':.05, 'dG-fraction':1.} - - parser.add_option("--extract", action="store_true", - dest="performextraction", - help="[Default] Perform extraction.") - parser.add_option("--no-extract", action="store_false", - dest="performextraction", - help="Do not perform extraction.") - parser.add_option("--range", nargs=2, dest="rng", type="float", - metavar="rmin rmax", - help="Extract over the range (rmin, rmax).") - parser.add_option("--qmax", dest="qmax", type="string", metavar="QMAX", - help="Model peaks with this maximum q value.") - parser.add_option("--nyquist", action="store_true", dest="nyquist", - help="Use Nyquist resampling if qmax > 0.") - parser.add_option("--no-nyquist", action="store_false", dest="nyquist", - help="Do not use Nyquist resampling.") - parser.add_option("--pf", dest="peakfunction", metavar="PF", - help="Fit peak function PF defined in " - "diffpy.srmise.peaks, e.g. " - "'GaussianOverR(maxwidth=0.7)'") - parser.add_option("--cres", dest="cres", type="float", metavar="cres", - help="Clustering resolution.") - parser.add_option("--supersample", dest="supersample", type="float", - metavar="SS", - help="Minimum initial oversampling rate as multiple of " - "Nyquist rate.") - parser.add_option("--me", "-m", dest="modelevaluator", metavar="ME", - help="ModelEvaluator defined in " - "diffpy.srmise.modelevaluators, e.g. 'AIC'") - - group = OptionGroup(parser, "Baseline Options", - "SrMise cannot determine the appropriate type of " - "baseline (e.g. crystalline vs. some nanoparticle) " - "solely from the data, so the user should specify the " - "appropriate type and/or parameters. (Default is " - "identically 0, which is unphysical.) SrMise keeps the " - "PDF baseline fixed at its initial value until the " - "final stages of peak extraction, so results are " - "frequently conditioned on that choice. (See the " - "SrMise documentation for details.) A good estimate " - "is therefore important for best results. SrMise can " - "estimate initial parameters from the data for linear " - "baselines in some situations (all peaks are positive, " - "and the degree of overlap in the region of extraction " - "is not too great), but in most cases it is best to " - "provide reasonable initial parameters. Run 'srmise " - "pdf_file.gr [baseline_option] --no-extract --plot' " - "for different values of the parameters for rapid " - "visual estimation.") - group.add_option("--baseline", dest="baseline", metavar="BL", - help="Estimate baseline from baseline function BL " - "defined in diffpy.srmise.baselines, e.g. " - "'Polynomial(degree=1)'. All parameters are free. " - "(Many POSIX shells attempt to interpret the " - "parentheses, and on these shells the option should " - "be surrounded by quotation marks.)" ) - group.add_option("--bcrystal", dest="bcrystal", type="string", - metavar="rho0[c]", - help="Use linear baseline defined by crystal number " - "density rho0. Append 'c' to make parameter " - "constant. Equivalent to " - "'--bpoly1 -4*pi*rho0[c] 0c'.") - group.add_option("--bsrmise", dest="bsrmise", type="string", metavar="file", - help="Use baseline from specified .srmise file.") - group.add_option("--bpoly0", dest="bpoly0", type="string", metavar="a0[c]", - help="Use constant baseline given by y=a0. " - "Append 'c' to make parameter constant.") - group.add_option("--bpoly1", dest="bpoly1", type="string", nargs=2, - metavar="a1[c] a0[c]", - help="Use baseline given by y=a1*x + a0. Append 'c' to " - "make parameter constant.") - group.add_option("--bpoly2", dest="bpoly2", type="string", nargs=3, - metavar="a2[c] a1[c] a0[c]", - help="Use baseline given by y=a2*x^2+a1*x + a0. Append " - "'c' to make parameter constant.") - group.add_option("--bseq", dest="bseq", type="string", metavar="FILE", - help="Use baseline interpolated from x,y values in FILE. " - "This baseline has no free parameters.") - group.add_option("--bspherical", dest="bspherical", type="string", nargs=2, - metavar="s[c] r[c]", - help="Use spherical nanoparticle baseline with scale s " - "and radius r. Append 'c' to make parameter " - "constant.") - parser.add_option_group(group) - - - group = OptionGroup(parser, "Uncertainty Options", - "Ideally a PDF reports the accurate experimentally " - "determined uncertainty. In practice, many PDFs " - "report none, while for others the reported values " - "are not necessarily reliable. (If in doubt, ask your " - "friendly neighborhood diffraction expert!) Even when " - "uncertainties are accurate, it can be " - "pragmatically useful to see how the results of " - "peak extraction change when assuming a different " - "value. Nevertheless, the primary determinant of " - "model complexity in SrMise is the uncertainty, so an " - "ad hoc uncertainty yields ad hoc model complexity. " - "See the SrMise documentation for further discussion, " - "including methods to mitigate this issue with " - "multimodel selection.") - group.add_option("--dg-mode", dest="dg_mode", type="choice", - choices=['absolute', 'data', 'max-fraction', - 'ptp-fraction', 'dG-fraction'], - help="Define how values passed to '--dg' are treated. " - "Possible values are: \n" - "'absolute' - The actual uncertainty in the PDF.\n" - "'max-fraction' - Fraction of max value in PDF.\n" - "'ptp-fraction' - Fraction of max minus min value " - "in the PDF.\n" - "'dG-fraction' - Fraction of dG reported by PDF.\n" - "If '--dg' is specified but mode is not, then mode " - "ia absolute. Otherwise, 'dG-fraction' is default " - "if the PDF reports uncertaintes, and 'max-fraction' " - "ia default if it does not.") - group.add_option("--dg", dest="dg", type="float", - help="Perform extraction assuming uncertainty dg. " - "Defaults depend on --dg-mode as follows:\n" - "'absolute'=%s\n" - "'max-fraction'=%s\n" - "'ptp-fraction'=%s\n" - "'dG-fraction'=%s" %(dg_defaults['absolute'], - dg_defaults['max-fraction'], - dg_defaults['ptp-fraction'], - dg_defaults['dG-fraction'])) -# group.add_option("--multimodel", nargs=3, dest="multimodel", type="float", -# metavar="dg_min dg_max n", -# help="Generate n models from dg_min to dg_max (given by " -# "--dg-mode) and perform multimodel analysis. " -# "This overrides any value given for --dg") - parser.add_option_group(group) - - - group = OptionGroup(parser, "Saving and Plotting Options", - "") - group.add_option("--pwa", dest="pwafile", metavar="FILE", - help="Save summary of result to FILE (.pwa format).") - group.add_option("--save", dest="savefile", metavar="FILE", - help="Save result of extraction to FILE (.srmise " - "format).") - group.add_option("--plot", "-p", action="store_true", dest="plot", - help="Plot extracted peaks.") - group.add_option("--liveplot", "-l", action="store_true", dest="liveplot", - help="(Experimental) Plot extracted peaks when fitting.") - group.add_option("--wait", "-w", action="store_true", dest="wait", - help="(Experimental) When using liveplot wait for user " - "after plotting.") - parser.add_option_group(group) - - - group = OptionGroup(parser, "Verbosity Options", - "Control detail printed to console.") - group.add_option("--informative", "-i", action="store_const", const="info", - dest="verbosity", - help="Summary of progress.") - group.add_option("--quiet", "-q", action="store_const", const="warning", - dest="verbosity", - help="[Default] Show minimal summary.") - group.add_option("--silent", "-s", action="store_const", const="critical", - dest="verbosity", - help="No non-critical output.") - group.add_option("--verbose", "-v", action="store_const", const="debug", - dest="verbosity", - help="Show verbose output.") - parser.add_option_group(group) - - group = OptionGroup(parser, "Deprecated Options", - "Not for general use.") - group.add_option("--scale", action="store_true", dest="scale", - help="(Deprecated) Scale supersampled uncertainties by " - "sqrt(oversampling) in intermediate steps when " - "Nyquist sampling.") - group.add_option("--no-scale", action="store_false", dest="scale", - help="(Deprecated) Never rescale uncertainties.") - parser.add_option_group(group) - - - (options, args) = parser.parse_args() - - if len(args) != 1: - parser.error("Exactly one argument required. \n"+usage) - - - from diffpy.srmise import srmiselog - srmiselog.setlevel(options.verbosity) - - from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction - from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError - - if options.peakfunction is not None: - from diffpy.srmise import peaks - try: - options.peakfunction = eval("peaks."+options.peakfunction) - except Exception, err: - print err - print "Could not create peak function '%s'. Exiting." \ - %options.peakfunction - return - - if options.modelevaluator is not None: - from diffpy.srmise import modelevaluators - try: - options.modelevaluator = \ - eval("modelevaluators."+options.modelevaluator) - except Exception, err: - print err - print "Could not find ModelEvaluator '%s'. Exiting." \ - %options.modelevaluator - return - - if options.bcrystal is not None: - from diffpy.srmise.baselines import Polynomial - bl = Polynomial(degree=1) - options.baseline = parsepars(bl, [options.bcrystal, '0c']) - options.baseline.pars[0] = -4*np.pi*options.baseline.pars[0] - elif options.bsrmise is not None: - # use baseline from existing file - blext = PDFPeakExtraction() - blext.read(options.bsrmise) - options.baseline = blext.extracted.baseline - elif options.bpoly0 is not None: - from diffpy.srmise.baselines import Polynomial - bl = Polynomial(degree=0) - options.baseline = parsepars(bl, [options.bpoly0]) - elif options.bpoly1 is not None: - from diffpy.srmise.baselines import Polynomial - bl = Polynomial(degree=1) - options.baseline = parsepars(bl, options.bpoly1) - elif options.bpoly2 is not None: - from diffpy.srmise.baselines import Polynomial - bl = Polynomial(degree=2) - options.baseline = parsepars(bl, options.bpoly2) - elif options.bseq is not None: - from diffpy.srmise.baselines import FromSequence - bl = FromSequence(options.bseq) - options.baseline = bl.actualize([], "internal") - elif options.bspherical is not None: - from diffpy.srmise.baselines import NanoSpherical - bl = NanoSpherical() - options.baseline = parsepars(bl, options.bspherical) - elif options.baseline is not None: - from diffpy.srmise import baselines - try: - options.baseline = eval("baselines."+options.baseline) - except Exception, err: - print err - print "Could not create baseline '%s'. Exiting." %options.baseline - return - - filename = args[0] - - if filename is not None: - ext = PDFPeakExtraction() - try: - ext.read(filename) - except (SrMiseDataFormatError, SrMiseFileError, Exception): - ext.loadpdf(filename) - - pdict = {} - if options.peakfunction is not None: - pdict["pf"] = [options.peakfunction] - if options.baseline is not None: - pdict["baseline"] = options.baseline - if options.cres is not None: - pdict["cres"] = options.cres - if options.dg_mode is None: - if options.dg is not None: - options.dg_mode = "absolute" - elif ext.dy is None: - options.dg_mode = "max-fraction" - else: - options.dg_mode = "dG-fraction" - if options.dg is None: - options.dg = dg_defaults[options.dg_mode] - if options.dg_mode == "absolute": - pdict["effective_dy"] = options.dg*np.ones(len(ext.x)) - elif options.dg_mode == "max-fraction": - pdict["effective_dy"] = options.dg*ext.y.max()*np.ones(len(ext.x)) - elif options.dg_mode == "ptp-fraction": - pdict["effective_dy"] = options.dg*ext.y.ptp()*np.ones(len(ext.y)) - elif options.dg_mode == "dG-fraction": - pdict["effective_dy"] = options.dg*ext.dy - if options.rng is not None: - pdict["rng"] = list(options.rng) - if options.qmax is not None: - pdict["qmax"] = options.qmax if options.qmax == "automatic" else float(options.qmax) - if options.nyquist is not None: - pdict["nyquist"] = options.nyquist - if options.supersample is not None: - pdict["supersample"] = options.supersample - if options.scale is not None: - pdict["scale"] = options.scale - if options.modelevaluator is not None: - pdict["error_method"] = options.modelevaluator - - if options.liveplot: - from diffpy.srmise import srmiselog - srmiselog.liveplotting(True, options.wait) - - ext.setvars(**pdict) - cov = None - if options.performextraction: - cov = ext.extract() - out = ext.extracted - - if options.savefile is not None: - try: - ext.write(options.savefile) - except SrMiseFileError, err: - print err - print "Could not save result to '%s'." %options.savefile - - - if options.pwafile is not None: - try: - ext.writepwa(options.pwafile) - except SrMiseFileError, err: - print err - print "Could not save pwa summary to '%s'." %options.pwafile - - - print ext - if cov is not None: - print cov - - if options.plot: - from diffpy.srmise.applications.plot import makeplot - makeplot(ext) - plt.show() - elif options.liveplot: - plt.show() - -def parsepars(mp, parseq): - """Return actualized model from sequence of strings. - - Each item in parseq must be interpretable as a float, or as - a float with the character 'c' appended. If 'c' is appended, - that parameter will be fixed. - - Parameters: - mp - A ModelPart instance - parseq - A sequence of string - """ - pars = [] - free = [] - for p in parseq: - if p[-1] == 'c': - pars.append(float(p[0:-1])) - free.append(False) - else: - pars.append(float(p)) - free.append(True) - return mp.actualize(pars, "internal", free=free) - - -### Class to preserve newlines in optparse -# Borrowed, with minor changes, from -# http://groups.google.com/group/comp.lang.python/browse_frm/thread/6df6e6b541a15bc2/09f28e26af0699b1 - -import textwrap -from optparse import IndentedHelpFormatter - - -class IndentedHelpFormatterWithNL(IndentedHelpFormatter): - def _format_text(self, text): - if not text: return "" - text_width = self.width - self.current_indent - indent = " "*self.current_indent -# the above is still the same - bits = text.split('\n') - formatted_bits = [ - textwrap.fill(bit, - text_width, - initial_indent=indent, - subsequent_indent=indent) - for bit in bits] - result = "\n".join(formatted_bits) + "\n" - return result - - def format_option(self, option): - # The help for each option consists of two parts: - # * the opt strings and metavars - # eg. ("-x", or "-fFILENAME, --file=FILENAME") - # * the user-supplied help string - # eg. ("turn on expert mode", "read data from FILENAME") - # - # If possible, we write both of these on the same line: - # -x turn on expert mode - # - # But if the opt string list is too long, we put the help - # string on a second line, indented to the same column it would - # start in if it fit on the first line. - # -fFILENAME, --file=FILENAME - # read data from FILENAME - result = [] - opts = self.option_strings[option] - opt_width = self.help_position - self.current_indent - 2 - if len(opts) > opt_width: - opts = "%*s%s\n" % (self.current_indent, "", opts) - indent_first = self.help_position - else: # start help on same line as opts - opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts) - indent_first = 0 - result.append(opts) - if option.help: - help_text = self.expand_default(option) -# Everything is the same up through here - help_lines = [] - for para in help_text.split("\n"): - help_lines.extend(textwrap.wrap(para, self.help_width)) -# Everything is the same after here - result.append("%*s%s\n" % ( - indent_first, "", help_lines[0])) - result.extend(["%*s%s\n" % (self.help_position, "", line) - for line in help_lines[1:]]) - elif opts[-1] != "\n": - result.append("\n") - return "".join(result) -### End class - -if __name__ == "__main__": - main() diff --git a/diffpy/srmise/baselines/base.py b/diffpy/srmise/baselines/base.py deleted file mode 100644 index d84160a..0000000 --- a/diffpy/srmise/baselines/base.py +++ /dev/null @@ -1,189 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -import logging - -import numpy as np - -import diffpy.srmise.srmiselog -from diffpy.srmise.basefunction import BaseFunction -from diffpy.srmise.modelparts import ModelPart -from diffpy.srmise.srmiseerrors import * - -logger = logging.getLogger("diffpy.srmise") - - -class BaselineFunction(BaseFunction): - """Base class for functions which represent some data's baseline term. - - Class members - ------------- - parameterdict: A dictionary mapping string keys to their index in the - sequence of parameters. These keys apply only to - the default "internal" format. - parformats: A sequence of strings defining what formats are recognized - by a baseline function. - default_formats: A dictionary which maps the strings "default_input" and - "default_output" to strings also appearing in parformats. - "default_input"-> format used internally within the class - "default_output"-> Default format to use when converting - parameters for outside use. - - Class methods (implemented by inheriting classes) - ------------------------------------------------- - estimate_parameters() (optional) - _jacobianraw() (optional, but strongly recommended) - _transform_derivativesraw() (optional, supports propagation of uncertainty for different paramaterizations) - _transform_parametersraw() - _valueraw() - - Class methods - ------------- - actualize() - - Inherited methods - ----------------- - jacobian() - value() - transform_derivatives() - transform_parameters() - """ - - def __init__( - self, - parameterdict, - parformats, - default_formats, - metadict, - base=None, - Cache=None, - ): - """Set parameterdict defined by subclass - - parameterdict: A dictionary mapping string keys to their index in a - sequence of parameters for this BaselineFunction subclass. - parformats: A sequence strings containing all allowed input/output - formats defined for the peak function's parameters. - default_formats: A dictionary mapping the string keys "internal" and - "default_output" to formats from parformats. - metadict: Dictionary mapping string keys to tuple (v, m) where v is an - additional argument required by function, and m is a method - whose string output recreates v when passed to eval(). - base: A basefunction subclass instance which this one decorates with - additional functionality. - Cache: A class (not instance) which implements caching of BaseFunction - evaluations.""" - BaseFunction.__init__( - self, parameterdict, parformats, default_formats, metadict, base, Cache - ) - - #### "Virtual" class methods #### - - #### Methods required by BaseFunction #### - - def actualize( - self, - pars, - in_format="default_input", - free=None, - removable=False, - static_owner=False, - ): - converted = self.transform_parameters(pars, in_format, out_format="internal") - return Baseline(self, converted, free, removable, static_owner) - - def getmodule(self): - return __name__ - - -# end of class BaselineFunction - - -class Baseline(ModelPart): - """Represents a baseline associated with a BaselineFunction subclass.""" - - def __init__(self, owner, pars, free=None, removable=False, static_owner=False): - """Set instance members. - - owner: an instance of a BaselineFunction subclass - pars: Sequence of parameters which define the baseline - free: Sequence of Boolean variables. If False, the corresponding - parameter will not be changed. - removable: (False) Boolean determines whether the baseline can be removed. - static_owner: (False) Whether or not the owner can be changed with - changeowner() - - Note that free and removable are not mutually exclusive. If any - values are not free but removable=True then the entire baseline may be - may be removed during peak extraction, but the held parameters for the - baseline will remain unchanged until that point. - """ - ModelPart.__init__(self, owner, pars, free, removable, static_owner) - - @staticmethod - def factory(baselinestr, ownerlist): - """Instantiate a Peak from a string. - - Parameters: - baselinestr: string representing Baseline - ownerlist: List of BaseFunctions that owner is in - """ - from numpy import array - - data = baselinestr.strip().splitlines() - - # dictionary of parameters - pdict = {} - for d in data: - l = d.split("=", 1) - if len(l) == 2: - try: - pdict[l[0]] = eval(l[1]) - except Exception: - emsg = "Invalid parameter: %s" % d - raise SrMiseDataFormatError(emsg) - else: - emsg = "Invalid parameter: %s" % d - raise SrMiseDataFormatError(emsg) - - # Correctly initialize the base function, if one exists. - idx = pdict["owner"] - if idx > len(ownerlist): - emsg = "Dependent base function not in ownerlist." - raise ValueError(emsg) - pdict["owner"] = ownerlist[idx] - - return Baseline(**pdict) - - -# End of class Baseline - -# simple test code -if __name__ == "__main__": - - import matplotlib.pyplot as plt - from numpy.random import randn - - from diffpy.srmise.modelcluster import ModelCluster - from diffpy.srmise.modelevaluators import AICc - from diffpy.srmise.peaks import GaussianOverR - - res = 0.01 - r = np.arange(2, 4, res) - err = np.ones(len(r)) # default unknown errors - pf = GaussianOverR(0.7) - evaluator = AICc() - - pars = [[3, 0.2, 10], [3.5, 0.2, 10]] - ideal_peaks = Peaks([pf.createpeak(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + 0.1 * randn(len(r)) diff --git a/diffpy/srmise/baselines/fromsequence.py b/diffpy/srmise/baselines/fromsequence.py deleted file mode 100644 index c06e2de..0000000 --- a/diffpy/srmise/baselines/fromsequence.py +++ /dev/null @@ -1,207 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -import logging - -import matplotlib.pyplot as plt -import numpy as np -import scipy.interpolate as spi - -import diffpy.srmise.srmiselog -from diffpy.srmise.baselines.base import BaselineFunction - -logger = logging.getLogger("diffpy.srmise") - -class FromSequence (BaselineFunction): - """Methods for evaluation of a baseline from discrete data via interpolation. - - FromSequence uses cubic spline interpolation (no smoothing) on discrete - points to approximate the baseline at arbitrary points within the - interpolation domain. This baseline function permits no free parameters.""" - - def __init__(self, *args, **kwds): - """Initialize baseline corresponding to sequences x and y. - - Usage: - FromSequence(xlist, ylist) or - FromSequence(x=xlist, y=ylist) - - FromSequence("filename") or - FromSequence(file="filename") - - - Parameters/Keywords - x: Sequence of x values defining baseline. - y: Sequence of y values defining baseline. - or - file: Name of file with column of x values and column of y values. - """ - if len(args)==1 and len(kwds)==0: - # load from file - x, y = self.readxy(args[0]) - elif len(args) == 0 and ("file" in kwds and "x" not in kwds and "y" not in kwds): - # load file - x, y = self.readxy(kwds["file"]) - elif len(args)==2 and len(kwds)==0: - # Load x, y directly from arguments - x = args[0] - y = args[1] - elif len(args) == 0 and ("x" in kwds and "y" in kwds and "file" not in kwds): - # Load x, y from keywords - x = kwds["x"] - y = kwds["y"] - else: - emsg = "Call to FromSequence does not match any allowed signature." - raise TypeError(emsg) - - # Guarantee valid lengths - if len(x) != len(y): - emsg = "Sequences x and y must have the same length." - raise ValueError(emsg) - parameterdict = {} - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} - self.spline = spi.InterpolatedUnivariateSpline(x, y) - self.minx = x[0] - self.maxx = x[-1] - metadict = {} - metadict["x"] = (x, self.xyrepr) - metadict["y"] = (y, self.xyrepr) - BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache=None) - - #### Methods required by BaselineFunction #### - - def estimate_parameters(self, r, y): - """Return empty numpy array. - - A FromSequence object has no free parameters, so there is nothing - to estimate. - - Parameters - r: (Numpy array) Data along r from which to estimate, Ignored - y: (Numpy array) Data along y from which to estimate, Ignored""" - return np.array([]) - - def _jacobianraw(self, pars, r, free): - """Return []. - - A FromSequence baseline has no parameters. - - Parameters - pars: Empty sequence - r: sequence or scalar over which pars is evaluated - free: Empty sequence.""" - if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." - raise ValueError(emsg) - if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." - raise ValueError(emsg) - return [] - - def _transform_parametersraw(self, pars, in_format, out_format): - """Convert parameter values from in_format to out_format. - - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - n/a, FromSequence has no parameters""" - temp = np.array(pars) - - # Convert to intermediate format "internal" - if in_format == "internal": - pass - else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - - # Convert to specified output format from "internal" format. - if out_format == "internal": - pass - else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - return temp - - def _valueraw(self, pars, r): - """Return value of polynomial for the given parameters and r values. - - Parameters - pars: Empty sequence - r: sequence or scalar over which pars is evaluated""" - if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." - raise ValueError(emsg) - try: - if r[0] < self.minx or r[-1] > self.maxx: - logger.warn("Warning: Evaluating interpolating function over %s, outside safe range of %s.", - [r[0], r[-1]], - [self.minx, self.maxx]) - except IndexError, TypeError: - if r < self.minx or r > self.maxx: - logger.warn("Warning: Evaluating interpolating function at %s, outside safe range of %s.", - r, - [self.minx, self.maxx]) - return self.spline(r) - - def getmodule(self): - return __name__ - - def xyrepr(self, var): - """Safe string output of x and y, compatible with eval()""" - return "[%s]" %", ".join([repr(v) for v in var]) - - def readxy(self, filename): - """ """ - from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError - - # TODO: Make this safer - try: - datastring = open(filename,'rb').read() - except Exception, err: - raise err - - import re - res = re.search(r'^[^#]', datastring, re.M) - if res: - datastring = datastring[res.end():].strip() - - x=[] - y=[] - - try: - for line in datastring.split("\n"): - v = line.split() - x.append(float(v[0])) - y.append(float(v[1])) - except (ValueError, IndexError), err: - raise SrMiseDataFormatError(str(err)) - - return (np.array(x), np.array(y)) - -#end of class FromSequence - -# simple test code -if __name__ == '__main__': - - r = np.arange(0, 9.42413, .2) - b = -(np.tanh(.5*r) + np.sin(.5*r)) - f = FromSequence(r, b) - pars = np.array([]) - free = np.array([]) - - r2 = np.arange(0, 9.42413, .5) - b2 = f._valueraw(pars, r2) diff --git a/diffpy/srmise/baselines/nanospherical.py b/diffpy/srmise/baselines/nanospherical.py deleted file mode 100644 index 04c1d43..0000000 --- a/diffpy/srmise/baselines/nanospherical.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -import logging - -import matplotlib.pyplot as plt -import numpy as np - -import diffpy.srmise.srmiselog -from diffpy.srmise.baselines.base import BaselineFunction -from diffpy.srmise.srmiseerrors import SrMiseEstimationError - -logger = logging.getLogger("diffpy.srmise") - -class NanoSpherical (BaselineFunction): - """Methods for evaluation of baseline of spherical nanoparticle of uniform density. - - Allowed formats are - internal: [scale, radius] - - Given nanoparticle radius R, the baseline is -scale*r*(1-(3r)/(4R)+(r^3)/(16*R^3)) in the - interval (0, abs(R)), and 0 elsewhere. Internally, both scale and radius are unconstrained, - but negative values are mapped to their physically meaningful positive equivalents. - - The expression in parentheses is gamma_0(r) for a sphere. For a well normalized PDF the - scale factor is 4*pi*rho_0, where rho_r is the nanoparticle density. - - gamma_0(r) Reference: - Guinier et. al. (1955). Small-angle Scattering from X-rays. New York: John Wiley & Sons, Inc. - """ - - def __init__(self, Cache=None): - """Initialize a spherical nanoparticle baseline. - - Parameters - Cache - A class (not instance) which implements caching of BaseFunction - evaluations. - """ - # Define parameterdict - parameterdict = {'scale':0, 'radius':1} - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} - metadict = {} - BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) - - #### Methods required by BaselineFunction #### - -# def estimate_parameters(self, r, y): -# """Estimate parameters for spherical baseline. (Not implemented!) -# -# Parameters -# r - array along r from which to estimate -# y - array along y from which to estimate -# -# Returns Numpy array of parameters in the default internal format. -# Raises NotImplementedError if estimation is not implemented for this -# degree, or SrMiseEstimationError if parameters cannot be estimated for -# any other reason. -# """ -# if len(r) != len(y): -# emsg = "Arrays r, y must have equal length." -# raise ValueError(emsg) - - def _jacobianraw(self, pars, r, free): - """Return the Jacobian of the spherical baseline. - - Parameters - pars - Sequence of parameters for a spherical baseline - pars[0] = scale - pars[1] = radius - r - sequence or scalar over which pars is evaluated. - free - sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation. - """ - if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." - raise ValueError(emsg) - if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." - raise ValueError(emsg) - jacobian = [None for p in range(self.npars)] - if (free == False).sum() == self.npars: - return jacobian - - if np.isscalar(r): - if r <= 0. or r >= 2.*pars[1]: - if free[0]: jacobian[0] = 0. - if free[1]: jacobian[1] = 0. - else: - if free[0]: jacobian[0] = self._jacobianrawscale(pars, r) - if free[1]: jacobian[1] = self._jacobianrawradius(pars, r) - else: - s = self._getdomain(pars, r) - if free[0]: - jacobian[0] = np.zeros(len(r)) - jacobian[0][s] = self._jacobianrawscale(pars, r[s]) - if free[1]: - jacobian[1] = np.zeros(len(r)) - jacobian[1][s] = self._jacobianrawradius(pars, r[s]) - return jacobian - - def _jacobianrawscale(self, pars, r): - """Return partial Jacobian wrt scale without bounds checking. - - Parameters - pars - Sequence of parameters for a spherical baseline - pars[0] = scale - pars[1] = radius - r - sequence or scalar over which pars is evaluated. - """ - s = np.abs(pars[0]) - R = np.abs(pars[1]) - rdivR = r/R - # From abs'(s) in derivative, which is equivalent to sign(s) except at 0 where it - # is undefined. Since s=0 is equivalent to the absence of a nanoparticle, sign will - # be fine. - sign = np.sign(pars[1]) - return -sign*r*(1-(3./4.)*rdivR+(1./16.)*rdivR**3) - - def _jacobianrawradius(self, pars, r): - """Return partial Jacobian wrt radius without bounds checking. - - Parameters - pars - Sequence of parameters for a spherical baseline - pars[0] = scale - pars[1] = radius - r - sequence or scalar over which pars is evaluated. - """ - s = np.abs(pars[0]) - R = np.abs(pars[1]) - # From abs'(R) in derivative, which is equivalent to sign(R) except at 0 where it - # is undefined. Since R=0 is a singularity anyway, sign will be fine. - sign = np.sign(pars[1]) - return sign*s*(3*r**2*(r**2-4*R**2))/(16*R**4) - - def _transform_parametersraw(self, pars, in_format, out_format): - """Convert parameter values from in_format to out_format. - - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class - - Defined Formats - internal - [scale, radius] - """ - temp = np.array(pars) - - # Convert to intermediate format "internal" - if in_format == "internal": - # Map both scale and radius to their positive equivalents - temp[0] = np.abs(temp[0]) - temp[1] = np.abs(temp[1]) - else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - - # Convert to specified output format from "internal" format. - if out_format == "internal": - pass - else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - return temp - - def _valueraw(self, pars, r): - """Return value of spherical baseline for the given parameters and r values. - - Outside the interval [0, radius] the baseline is 0. - - Parameters - pars - Sequence of parameters for a spherical baseline - pars[0] = scale - pars[1] = radius - r - sequence or scalar over which pars is evaluated. - """ - if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." - raise ValueError(emsg) - if np.isscalar(r): - if r <= 0. or r >= 2.*pars[1]: - return 0. - else: - return self._valueraw2(pars, r) - else: - out = np.zeros(len(r)) - s = self._getdomain(pars, r) - out[s] = self._valueraw2(pars, r[s]) - return out - - def _valueraw2(self, pars, r): - """Return value of spherical baseline without bounds checking for given parameters and r values. - - Parameters - pars - Sequence of parameters for a spherical baseline - pars[0] = scale - pars[1] = radius - r - sequence or scalar over which pars is evaluated. - """ - s = np.abs(pars[0]) - R = np.abs(pars[1]) - rdivR = r/R - return -s*r*(1-(3./4.)*rdivR+(1./16.)*rdivR**3) - - def _getdomain(self, pars, r): - """Return slice object for which r > 0 and r < twice the radius""" - low = r.searchsorted(0., side='right') - high = r.searchsorted(2.*pars[1], side='left') - return slice(low, high) - - def getmodule(self): - return __name__ - -#end of class NanoSpherical - -# simple test code -if __name__ == '__main__': - - f = NanoSpherical() - r = np.arange(-5, 10) - pars = np.array([-1., 7.]) - free = np.array([False, True]) - print "Testing nanoparticle spherical baseline" - print "Scale: %f, Radius: %f" %(pars[0], pars[1]) - print "-----------------------------------------" - val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - outjac = [j if j is not None else [None]*len(r) for j in jac] - print "r".center(10), "value".center(10), "jac(scale)".center(10), "jac(radius)".center(10) - for tup in zip(r, val, *outjac): - for t in tup: - if t is None: - print ("%s" %None).ljust(10), - else: - print ("% .3g" %t).ljust(10), - print diff --git a/diffpy/srmise/baselines/polynomial.py b/diffpy/srmise/baselines/polynomial.py deleted file mode 100644 index 4d95c4f..0000000 --- a/diffpy/srmise/baselines/polynomial.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -import logging - -import matplotlib.pyplot as plt -import numpy as np - -import diffpy.srmise.srmiselog -from diffpy.srmise.baselines.base import BaselineFunction -from diffpy.srmise.srmiseerrors import SrMiseEstimationError - -logger = logging.getLogger("diffpy.srmise") - -class Polynomial (BaselineFunction): - """Methods for evaluation and parameter estimation of a polynomial baseline.""" - - def __init__(self, degree, Cache=None): - """Initialize a polynomial function of degree d. - - Parameters - degree: The degree of the polynomial. Any negative value is interpreted - as the polynomial of negative infinite degree. - Cache: A class (not instance) which implements caching of BaseFunction - evaluations. - """ - # Guarantee valid degree - try: - self.degree = int(str(degree)) - except ValueError: - emsg = "Argument degree must be an integer." - raise ValueError(emsg) - if self.degree < 0: - self.degree = -1 # interpreted as negative infinity - # Define parameterdict - # e.g. {"a_0":3, "a_1":2, "a_2":1, "a_3":0} if degree is 3. - parameterdict = {} - for d in range(self.degree+1): - parameterdict["a_"+str(d)] = self.degree - d - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} - metadict = {} - metadict["degree"] = (degree, repr) - BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) - - #### Methods required by BaselineFunction #### - - def estimate_parameters(self, r, y): - """Estimate parameters for polynomial baseline. - - Estimation is currently implemented only for degree < 2. This - very rudimentary method assumes the baseline crosses the origin, and - y=baseline+signal, where signal is primarily positive. - - Parameters - r: (Numpy array) Data along r from which to estimate - y: (Numpy array) Data along y from which to estimate - - Returns Numpy array of parameters in the default internal format. - Raises NotImplementedError if estimation is not implemented for this - degree, or SrMiseEstimationError if parameters cannot be estimated for - any other reason.""" - if self.degree > 1: - emsg = "Polynomial implements estimation for baselines of degree <= 1 only." - raise NotImplementedError(emsg) - if len(r) != len(y): - emsg = "Arrays r, y must have equal length." - raise ValueError(emsg) - - if self.degree == -1: - return np.array([]) - - if self.degree == 0: - return np.array([0.]) - - if self.degree == 1: - # Estimate degree=1 baseline. - # Find best slope for y=slope*r using only the least 10% of all - # points, assuming the non-baseline component of the data largely - # lies above the baseline. - # TODO: Make this more sophisticated. - try: - cut = np.max([len(y)/10, 1]) - cut_idx = y.argsort()[:cut] - - import numpy.linalg as la - A = np.array([r[cut_idx]]).T - slope = la.lstsq(A, y[cut_idx])[0][0] - return np.array([slope, 0.]) - except Exception, e: - emsg = "Error during estimation -- "+str(e) - raise - raise SrMiseEstimationError(emsg) - - def _jacobianraw(self, pars, r, free): - """Return the Jacobian of a polynomial. - - Parameters - pars: Sequence of parameters for a polynomial of degree d - pars[0] = a_degree - pars[1] = a_(degree-1) - ... - pars[d] = a_0 - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation. - """ - if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." - raise ValueError(emsg) - if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." - raise ValueError(emsg) - jacobian = [None for p in range(self.npars)] - if (free == False).sum() == self.npars: - return jacobian - - # The partial derivative with respect to the nth coefficient of a - # polynomial is just x^nth. - for idx in range(self.npars): - if free[idx]: - jacobian[idx] = np.power(r, idx) - return jacobian - - def _transform_parametersraw(self, pars, in_format, out_format): - """Convert parameter values from in_format to out_format. - - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [a_degree, a_(degree-1), ..., a_0]""" - temp = np.array(pars) - - # Convert to intermediate format "internal" - if in_format == "internal": - pass - else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - - # Convert to specified output format from "internal" format. - if out_format == "internal": - pass - else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - return temp - - def _valueraw(self, pars, r): - """Return value of polynomial for the given parameters and r values. - - Parameters - pars: Sequence of parameters for a polynomial of degree d - pars[0] = a_degree - pars[1] = a_(degree-1) - ... - pars[d] = a_0 - If degree is negative infinity, pars is an empty sequence. - r: sequence or scalar over which pars is evaluated""" - if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." - raise ValueError(emsg) - return np.polyval(pars, r) - - def getmodule(self): - return __name__ - -#end of class Polynomial - -# simple test code -if __name__ == '__main__': - - # Test polynomial of degree 3 - print "Testing degree 3 polynomial" - print "---------------------------" - f = Polynomial(degree = 3) - r = np.arange(5) - pars = np.array([3, 0, 1, 2]) - free = np.array([True, False, True, True]) - val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - print "Value:\n", val - print "Jacobian: " - for j in jac: print " %s" %j - - # Test polynomial of degree -oo - print "\nTesting degree -oo polynomial (== 0)" - print "------------------------------------" - f = Polynomial(degree = -1) - r = np.arange(5) - pars = np.array([]) - free = np.array([]) - val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - print "Value:\n", val - print "Jacobian: " - for j in jac: print " %s" %j - - # Test linear estimation - print "\nTesting linear baseline estimation" - print "------------------------------------" - f = Polynomial(degree = 1) - pars = np.array([1, 0]) - r = np.arange(0, 10, .1) - y = -r + 10*np.exp(-(r-5)**2) + np.random.rand(len(r)) - est = f.estimate_parameters(r, y) - print "Actual baseline: ", np.array([-1, 0.]) - print "Estimated baseline: ", est diff --git a/diffpy/srmise/modelparts.py b/diffpy/srmise/modelparts.py deleted file mode 100644 index 1cb6412..0000000 --- a/diffpy/srmise/modelparts.py +++ /dev/null @@ -1,575 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## -"""Module for representing instances of mathematical functions. - -Classes -------- -ModelPart: Superclass of Peak and Baseline -ModelParts: Collection (list) of ModelPart instances. -""" - -import logging - -import numpy as np -from scipy.optimize import leastsq - -from diffpy.srmise import srmiselog -from diffpy.srmise.srmiseerrors import * - -logger = logging.getLogger("diffpy.srmise") - -import matplotlib.pyplot as plt - -# Output of scipy.optimize.leastsq for a single parameter changed in scipy 0.8.0 -# Before it returned a scalar, later it returned an array of length 1. -import pkg_resources as pr - -__spv__ = pr.get_distribution('scipy').version -__oldleastsqbehavior__ = (pr.parse_version(__spv__) < pr.parse_version('0.8.0')) - - -class ModelParts(list): - """A collection of ModelPart instances. - - Methods - ------- - copy: Return deep copy - fit: Fit to given data - npars: Return total number of parameters - pack_freepars: Update free parameters with values in given sequence - residual: Return residual of model - residual_jacobian: Return jacobian of residual of model - transform: Change format of parameters. - value: Return value of model - unpack_freepars: Return sequence containing value of all free parameters - """ - - def __init__(self, *args, **kwds): - list.__init__(self, *args, **kwds) - - def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="default_output"): - """Chi-square fit of all free parameters to given data. - - There must be at least as many free parameters as data points. - Fitting is performed with the MINPACK leastsq() routine exposed by scipy. - - Parameters - r - Sequence of r values over which to fit - y - Sequence of y values over which to fit - y_error - Sequence of uncertainties in y - range - Slice object specifying region of r and y over which to fit. - Fits over all the data by default. - ntrials - The maximum number of function evaluations while fitting. - cov - Optional ModelCovariance object preserves covariance information. - cov_format - Parameterization to use in cov. - """ - freepars = self.unpack_freepars() - if len(freepars) >= len(r): - emsg = "Cannot fit model with " + str(len(freepars)) +\ - " free parametersbut only "+str(len(r)) + " data points." - raise SrMiseFitError(emsg) - if len(freepars) == 0: - #emsg = "Cannot fit model with no free parameters." - #raise SrMiseFitError(emsg) - return - - if range == None: - range = slice(None) - - args = (r, y, y_error, range) - - if srmiselog.liveplots: - plt.figure(1) - plt.ioff() - plt.subplot(211) - plt.cla() - plt.title("Before") - plt.plot(r, y, label="_nolabel_") - plt.plot(r, (y-self.value(r, range=range))-1.1*(max(y) - min(y)), label="_nolabel_") - for p in self: - plt.plot(r, p.value(r, range=range), label=str(p)) - plt.ion() - - try: - f = leastsq( - self.residual, # minimize this function - freepars, # initial parameters - args=args, # arguments to residual, residual_jacobian - Dfun=self.residual_jacobian, # explicit Jacobian - col_deriv=1, # order of derivatives in Jacobian - full_output=1, - maxfev=ntrials) - except NotImplementedError: - # TODO: Figure out if is worth checking for residual_jacobian - # before leastsq(). This exception will either occur almost never - # or extremely frequently, and the extra evaluations will add up. - logger.info("One or more functions do not define residual_jacobian().") - f = leastsq( - self.residual, # minimize this function - freepars, # initial parameters - args=args, # arguments to residual - col_deriv=1, # order of derivatives in Jacobian - full_output=1, - maxfev=ntrials) - except Exception: - # Sadly, KeyboardInterrupt, etc. is reraised as minpack.error - # Not much I can do about that, though. - import traceback - emsg = "Unexpected error in modelparts.fit(). Original exception:\n" +\ - traceback.format_exc() + "End original exception." - raise SrMiseFitError(emsg) - - result = f[0] - if __oldleastsqbehavior__ and len(freepars) == 1: - # leastsq returns a scalar when there is only one parameter - result = np.array([result]) - - self.pack_freepars(result) - - if srmiselog.liveplots: - plt.draw() - plt.ioff() - plt.figure(1) - plt.subplot(212) - plt.cla() - plt.title("After") - plt.ion() - plt.plot(r, y, - r, (y-self.value(r, range=range))-1.1*(max(y) - min(y)), - *[i for sublist in [[r, p.value(r, range=range)] for p in self] for i in sublist]) - plt.draw() - - if srmiselog.wait: - print "Press 'Enter' to continue...", - raw_input() - - if f[4] not in (1,2,3,4): - emsg = "Fit did not succeed -- " + str(f[3]) - raise SrMiseFitError(emsg) - - # clean up parameters - for p in self: - p.pars = p.owner().transform_parameters(p.pars, in_format="internal", out_format="internal") - - # Supply estimated covariance matrix if requested. - # The precise relationship between f[1] and estimated covariance matrix is a little unclear from - # the documentation of leastsq. This is the interpretation given by scipy.optimize.curve_fit, - # which is a wrapper around leastsq. - if cov is not None: - pcov = f[1] - fvec = f[2]["fvec"] - dof = len(r) - len(freepars) - cov.setcovariance(self, pcov*np.sum(fvec**2)/dof) - try: - cov.transform(in_format="internal", out_format=cov_format) - except SrMiseUndefinedCovarianceError as e: - logger.warn("Covariance not defined. Fit may not have converged.") - - - return -#### Notes on the fit f -# f[0] = solution -# f[1] = Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. -# None if a singular matrix encountered (indicates very flat curvature in some direction). -# This matrix must be multiplied by the residual variance to get the covariance of the parameter -# estimates - see curve fit. -# f[2] = dictionary{nfev: int, fvec: array(), fjac: array(), ipvt: array(), qtf: array()} -# nfev - The number of function calls made -# fvec - function (residual) evaluated at solution -# fjac - "a permutation of the R matrix of a QR factorization of the final Jacobian." -# ipvt - integer array defining a permutation matrix P such that fjac*P=QR -# qtf - transpose(q)*fvec -# f[3] = message about results of fit -# f[4] = integer flag. Fit was successful on 1,2,3, or 4. Otherwise unsuccessful. - - def npars(self, count_fixed=True): - """Return total number of parameters in all parts. - - Parameters - count_fixed - Boolean which determines if fixed parameters are - are included in the count. - """ - n = 0 - for p in self: - n+=p.npars(count_fixed=count_fixed) - return n - - def pack_freepars(self, freepars): - """Update parameters with values from sequence of freepars.""" - if np.isnan(freepars).any(): - emsg = "Non-numeric free parameters." - raise ValueError(emsg) - freeidx = 0 - for p in self: - freeidx += p.update(freepars[freeidx:]) - - def residual(self, freepars, r, y_expected, y_error, range=None): - """Calculate residual of all parameters. - - Parameters - freepars - sequence of free parameters - r - the input domain - y_expected - sequence of expected values - y_error - sequence of uncertainties in y-variable - range - Slice object specifying region of r and y over which to fit. - All the data by default. - """ - self.pack_freepars(freepars) - total = self.value(r, range) - try: - if range is None: - range = slice(0, len(r)) - return (y_expected[range]-total[range])/y_error[range] - except TypeError: - return (y_expected-total)/y_error - - def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): - """Calculate the Jacobian of freepars. - - Parameters - freepars - sequence of free parameters - r - the input domain - y_expected - sequence of expected values - y_error - sequence of uncertainties in y-variable - range - Slice object specifying region of r and y over which to fit. - All the data by default. - """ - if len(freepars) == 0: - raise ValueError("Argument freepars has length 0. The Jacobian " - "is only defined with >=1 free parameters.") - - self.pack_freepars(freepars) - tempJac=[] - for p in self: - tempJac[len(tempJac):] = p.jacobian(r, range) - # Since the residual is (expected - calculated) the jacobian - # of the residual has a minus sign. - jac=-np.array([j for j in tempJac if j is not None]) - try: - if range is None: - range = slice(0, len(r)) - return jac[:,range]/y_error[range] - except TypeError: - return jac/y_error - - def value(self, r, range=None): - """Calculate total value of all parts over range. - - Parameters - r - the input domain - range - Slice object specifying region of r and y over which to fit. - All the data by default. - """ - total = r * 0. - for p in self: - total += p.value(r, range) - return total - - def unpack_freepars(self): - """Return array of all free parameters.""" - #To check: ravel() sometimes returns a reference and othertimes a copy. - # Do I need to use flatten() instead? - return np.concatenate([p.compress() for p in self]).ravel() - - def covariance(self, format="internal", **kwds): - """Return estimated covariance matrix of the model. - - The covariance matrix may be given in terms of any parameterization - defined by the formats for each individual ModelPart. - - Parameters - format - The format ("internal" by default) to use for all ModelParts. - This may be overridden for specific peaks as shown below. - - Keywords - f0 - The format of the 0th ModelPart - f1 - The format of the 1st ModelPart - etc. - """ - formats = [format for p in self] - - for k, v in kwds.items(): - try: - idx = int(k[1:]) - except ValueError: - emsg = "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." %k - raise ValueError(emsg) - - formats[int(k[1:])] = v - - - - return - - - def copy(self): - """Return deep copy of this ModelParts. - - The original and the copy are completely independent, except each - ModelPart and its copy still reference the same owner.""" - return type(self).__call__([p.copy() for p in self]) - - def __str__(self): - """Return string representation of this ModelParts.""" - return ''.join([str(p)+"\n" for p in self]) - - def __getslice__(self, i, j): - """Extends list.__getslice__""" - return self.__class__(list.__getslice__(self, i, j)) - - def transform(self, in_format="internal", out_format="internal"): - """Transforms format of parameters in this modelpart. - - Parameters - in_format - The format the parameters are already in. - out_format - The format the parameters are transformed to. - """ - for p in self: - try: - p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) - except ValueError: - logger.info("Invalid parameter transformation: Ignoring %s->%s for function of type %s." %(in_format, out_format, p.owner().getmodule())) - -# End of class ModelParts - -class ModelPart(object): - """Represents a single part (instance of some function) of a model. - - Members - ------- - pars - Array containing the parameters of this model part - free - Array containing boolean values defining whether the corresponding parameter - is free or not. - removable - Boolean determining whether or not this model part can be - removed during extraction. - static_owner - Boolean determines if owner can be changed with changeowner() - - Methods - ------- - changeowner - Change the owner of self - copy - Return deep copy of self - compress - Return parameters with non-free parameters removed - jacobian - Return jacobian - getfree - Return free parameter by index or keyword define by owner - npars - Return number of parameters in self - owner - Return self.owner - setfree - Set a free parameter by index or keyword defined by owner - update - Update free parameters with values in given sequence - value - Return value - writestr - Return string representation of self - """ - - def __init__(self, owner, pars, free=None, removable=True, static_owner=False): - """Set instance members. - - Parameters - owner - an instance of a BaseFunction subclass - pars - Sequence of parameters which specify the function explicitly - free - Sequence of Boolean variables. If False, the corresponding - parameter will not be changed. - removable - Boolean determines whether this part can be removed. - static_owner - Whether or not the part can be changed with - changeowner() - - Note that free and removable are not mutually exclusive. If any - pars are not free but removable=True then the part may be removed, but - the held parameters for this part will remain unchanged until then. - """ - self._owner = owner - - if len(pars) != owner.npars: - emsg = "The length of pars must equal the number of parameters "+\ - "specified by the model part owner." - raise ValueError(emsg) - self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart - - if free is None: - self.free = np.array([True for p in pars], dtype=bool) - else: - self.free = np.array(free, dtype=bool) - if len(self.free) != owner.npars: - emsg = "The length of free must be equal to the number of "+\ - "parameters specified by the model part owner." - raise ValueError(emsg) - - self.removable = removable - self.static_owner = static_owner - - def changeowner(self, owner): - """Change the owner of this part. - - Does not change the parameters associated with this model part. Raises - SrMiseStaticOwnerError if this peak has been declared to have a static - owner, or if the number of parameters is incompatible. - - Parameters - owner - an instance of a BaseFunction subclass - """ - if self.static_owner and self._owner is not owner: - emsg = "Cannot change owner if static_owner is True." - raise SrMiseStaticOwnerError(emsg) - if self._owner.npars != owner.npars: - emsg = "New owner specifies different number of parameters than "+\ - "original owner." - raise SrMiseStaticOwnerError(emsg) - self._owner = owner - - def compress(self): - """Return part parameters with non-free values removed.""" - return self.pars[self.free] - - def jacobian(self, r, range=None): - """Return jacobian of this part over r. - - Parameters - r - the input domain - range - Slice object specifying region of r and y over which to fit. - All the data by default. - """ - return self._owner.jacobian(self, r, range) - - def owner(self): - """Return the BaseFunction subclass instance which owns this part.""" - return self._owner - - def update(self, freepars): - """Sequentially update free parameters from freepars. - - Parameters - freepars - sequence of new parameter values. May contain more - parameters than can actually be updated. - - Return number of parameters updated from freepars. - """ - numfree = self.npars(count_fixed=False) - if len(freepars) < numfree: - pass # raise "freepars does not have enough elements to - # update every unheld parameter." - # TODO: Check if I need to make copies here, or if references - # to parameters are safe. - self.pars[self.free] = freepars[:numfree] - return numfree - - def value(self, r, range=None): - """Return value of peak over r. - - Parameters - r - the input domain - range - Slice object specifying region of r and y over which to fit. - All the data by default. - """ - return self._owner.value(self, r, range) - - def copy(self): - """Return a deep copy of this ModelPart. - - The original and the copy are completely independent, except they both - reference the same owner.""" - return type(self).__call__(self._owner, self.pars, self.free, self.removable, self.static_owner) - - def __getitem__(self, key_or_idx): - """Return parameter of peak corresponding with key_or_idx. - - Parameters - key_or_idx - An integer index, slice, or key from owner's parameter - dictionary. - """ - if key_or_idx in self._owner.parameterdict: - return self.pars[self._owner.parameterdict[key_or_idx]] - else: - return self.pars[key_or_idx] - - def getfree(self, key_or_idx): - """Return value of free corresponding with key_or_idx. - - Parameters - key_or_idx - An integer index, slice, or key from owner's parameter - dictionary.""" - if key_or_idx in self._owner.parameterdict: - return self.free[self._owner.parameterdict[key_or_idx]] - else: - return self.free[key_or_idx] - - def setfree(self, key_or_idx, value): - """Set value of free corresponding with key_or_idx. - - Parameters - key_or_idx - An integer index, slice, or key from owner's parameter - dictionary. - value: A boolean""" - if key_or_idx in self._owner.parameterdict: - self.free[self._owner.parameterdict[key_or_idx]] = value - else: - self.free[key_or_idx] = value - - def __len__(self): - """Return number of parameters, including any fixed ones.""" - return self._owner.npars - - def npars(self, count_fixed=True): - """Return total number of parameters in all parts. - - Parameters - count_fixed - Boolean which determines if fixed parameters are - are included in the count.""" - if count_fixed: - return self._owner.npars - else: - return (self.free == True).sum() - - def __str__(self): - """Return string representation of ModelPart parameters.""" - return str(self._owner.transform_parameters(self.pars, in_format="internal", out_format="default_output")) - - def __eq__(self, other): - """ """ - if hasattr(other, "_owner"): - return ((self._owner is other._owner) and - np.all(self.pars == other.pars) and - np.all(self.free == other.free) and - self.removable == other.removable) - else: - return False - - def __ne__(self, other): - """ """ - return not self == other - - def writestr(self, ownerlist): - """Return string representation of ModelPart. - - The value of owner is determined by its index in ownerlist. - - Parameters - ownerlist - List of owner functions - """ - if self._owner not in ownerlist: - emsg = "ownerlist does not contain this ModelPart's owner." - raise ValueError(emsg) - lines = [] - lines.append("owner=%s" %repr(ownerlist.index(self._owner))) - - #Lists/numpy arrays don't give full representation of long lists - lines.append("pars=[%s]" %", ".join([repr(p) for p in self.pars])) - lines.append("free=[%s]" %", ".join([repr(f) for f in self.free])) - lines.append("removable=%s" %repr(self.removable)) - lines.append("static_owner=%s" %repr(self.static_owner)) - datastring = "\n".join(lines)+"\n" - return datastring - -# End of class ModelPart - -# simple test code -if __name__ == '__main__': - - pass diff --git a/diffpy/srmise/peaks/gaussian.py b/diffpy/srmise/peaks/gaussian.py deleted file mode 100644 index 3eae227..0000000 --- a/diffpy/srmise/peaks/gaussian.py +++ /dev/null @@ -1,350 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -import logging - -import matplotlib.pyplot as plt -import numpy as np - -import diffpy.srmise.srmiselog -from diffpy.srmise.peaks.base import PeakFunction -from diffpy.srmise.srmiseerrors import SrMiseEstimationError, SrMiseScalingError, SrMiseTransformationError - -logger = logging.getLogger("diffpy.srmise") - -class Gaussian (PeakFunction): - """Methods for evaluation and parameter estimation of width-limited Gaussian. - - Allowed formats are - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] - - The internal parameterization is unconstrained, but are interpreted - so that the width is between 0 and a user-provided maximum full width - at half maximum, and the area is positive. - - Note that all full width at half maximum values are for the - corresponding Gaussian. - """ - - # Possibly implement cutoff later, but low priority. - # cutoff=3/np.sqrt(2*np.log(2)) - # cutoff defines a distance = maxwidth*cutoff from the maximum beyond - # which the function is considered 0. By default this distance is - # equivalent to 3 standard deviations. - def __init__(self, maxwidth, Cache=None): - """maxwidth defined as full width at half maximum for the - corresponding Gaussian, which is physically relevant.""" - parameterdict={'position':0,'width':1,'area':2} - formats=['internal','pwa','mu_sigma_area'] - default_formats={'default_input':'internal', 'default_output':'pwa'} - metadict = {} - metadict["maxwidth"] = (maxwidth, repr) - PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) - - if maxwidth <= 0: - emsg = "'maxwidth' must be greater than 0." - raise ValueError(emsg) - self.maxwidth = maxwidth - - ### Useful constants ### - #c1 and c2 help with function values - self.c1 = self.maxwidth*np.sqrt(np.pi/(8*np.log(2))) - self.c2 = self.maxwidth**2/(8*np.log(2)) - - #c3 and c4 help with parameter estimation - self.c3 = .5*np.sqrt(np.pi/np.log(2)) - self.c4 = np.pi/(self.maxwidth*2) - - #convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma - self.sigma2fwhm = 2*np.sqrt(2*np.log(2)) - - return - - #### Methods required by PeakFunction #### - - def estimate_parameters(self, r, y): - """Estimate parameters for single peak from data provided. - - Parameters - r: (Numpy array) Data along r from which to estimate - y: (Numpy array) Data along y from which to estimate - - Returns Numpy array of parameters in the default internal format. - Raises SrMiseEstimationError if parameters cannot be estimated for any - reason.""" - if len(r) != len(y): - emsg = "Arrays r, y must have equal length." - raise SrMiseEstimationError(emsg) - - logger.debug("Estimate peak using %s point(s)", len(r)) - - minpoints_required = 3 - - # filter out negative points - usable_idx = [i for i in range(len(y)) if y[i] > 0] - use_r = r[usable_idx] - use_y = y[usable_idx] - - if len(usable_idx) < minpoints_required: - emsg = "Not enough data for successful estimation." - raise SrMiseEstimationError(emsg) - - #### Estimation #### - guesspars = np.array([0., 0., 0.], dtype=float) - min_y = use_y.min() - max_y = use_y.max() - center = use_r[use_y.argmax()] - - if min_y != max_y: - weights = (use_y-min_y)**2 - guesspars[0] = np.sum(use_r*weights)/sum(weights) - # guesspars[0] = center - if use_y[0] < max_y: - sigma_left = np.sqrt(-.5*(use_r[0]-guesspars[0])**2/np.log(use_y[0]/max_y)) - else: - sigma_left = np.sqrt(-.5*np.mean(np.abs(np.array([use_r[0]-guesspars[0], use_r[-1]-guesspars[0]])))**2/np.log(min_y/max_y)) - if use_y[-1] self.maxwidth: - #account for width-limit - guesspars[2] = self.c3*max_y*self.maxwidth - guesspars[1] = np.pi/2 #parameterized in terms of sin - else: - guesspars[2] = self.c3*max_y*guesspars[1] - guesspars[1] = np.arcsin(2*guesspars[1]**2/self.maxwidth**2-1.) #parameterized in terms of sin - - return guesspars - - def scale_at(self, pars, x, scale): - """Change parameters so value(x)->scale*value(x). - - Does not change position or height of peak's maxima. Raises - SrMiseScalingError if the parameters cannot be scaled. - - Parameters - pars: (Array) Parameters corresponding to a single peak - x: (float) Position of the border - scale: (float > 0) Size of scaling at x.""" - if scale <= 0: - emsg = ''.join(["Cannot scale by ", str(scale), "."]) - raise SrMiseScalingError(emsg) - - if scale == 1: - return pars - else: - ratio = 1/scale # Ugly: Equations orig. solved in terms of ratio - - tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") - - #solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) - # 2. f(x;mu1,sigma1,area1)=ratio*f(x;mu1,sigma2,area2) - # 3. mu1=mu2=rmax (the maximum of a Gaussian occurs at r=mu) - # for mu2, sigma2, area2 (with appropriate unit conversions to fwhm at the end). - # The expression for rmax is the appropriate solution to df/dr=0 - mu1, sigma1, area1 = tpars - - # the semi-nasty algebra reduces to something nice - mu2 = mu1 - area2 = np.sqrt(area1**2/(2*np.log(ratio)*sigma1**2/(x-mu1)**2+1)) - sigma2 = sigma1*area2/area1 - - tpars[0] = mu2 - tpars[1] = sigma2 - tpars[2] = area2 - try: - tpars = self.transform_parameters(tpars, in_format="mu_sigma_area", out_format="internal") - except SrMiseTransformationError, err: - raise SrMiseScalingError(str(err)) - return tpars - - def _jacobianraw(self, pars, r, free): - """Return Jacobian of width-limited Gaussian. - - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation. - """ - jacobian=[None, None, None] - if (free == False).sum() == self.npars: - return jacobian - - #Optimization - sin_p = np.sin(pars[1]) + 1. - p0minusr = pars[0]-r - exp_p = np.exp(-(p0minusr)**2/(self.c2*sin_p))/(self.c1*np.sqrt(sin_p)) - - if free[0]: - #derivative with respect to peak position - jacobian[0] = -2.*exp_p*p0minusr*np.abs(pars[2])/(self.c2*sin_p) - if free[1]: - #derivative with respect to reparameterized peak width - jacobian[1] = -exp_p*np.abs(pars[2])*np.cos(pars[1])*(self.c2*sin_p-2*p0minusr**2)/(2.*self.c2*sin_p**2) - if free[2]: - #derivative with respect to peak area - #abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has - #non-zero area and the function is paramaterized such that values of either sign represent equivalent - #curves I arbitrarily choose positive sign for pars[2]==0 in order to push the system back into a realistic - #parameter space should this improbable scenario occur. - # jacobian[2] = sign(pars[2])*exp_p - if pars[2] >= 0: - jacobian[2] = exp_p - else: - jacobian[2] = -exp_p - return jacobian - - def _transform_parametersraw(self, pars, in_format, out_format): - """Convert parameter values from in_format to out_format. - - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. - - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] - """ - temp = np.array(pars) - - # Do I need to change anything? The internal parameters may need to be - # placed into the preferred range, even though their interpretation does - # not change. - if in_format == out_format and in_format != "internal": - return pars - - # Convert to intermediate format "internal" - if in_format == "internal": - # put the parameter for width in the "physical" quadrant [-pi/2,pi/2], - # where .5*(sin(p)+1) covers fwhm = [0, maxwidth] - n = np.floor((temp[1]+np.pi/2)/np.pi) - if np.mod(n, 2) == 0: - temp[1] = temp[1] - np.pi*n - else: - temp[1] = np.pi*n - temp[1] - temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one - elif in_format == "pwa": - if temp[1] > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(temp[1], self.maxwidth) - raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*temp[1]**2/self.maxwidth**2-1.) - elif in_format == "mu_sigma_area": - fwhm = temp[1]*self.sigma2fwhm - if fwhm > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(fwhm, self.maxwidth) - raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*fwhm**2/self.maxwidth**2-1.) - else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - - # Convert to specified output format from "internal" format. - if out_format == "internal": - pass - elif out_format == "pwa": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2) - elif out_format == "mu_sigma_area": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2)/self.sigma2fwhm - else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - return temp - - def _valueraw(self, pars, r): - """Return value of width-limited Gaussian for the given parameters and r values. - - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated - """ - return np.abs(pars[2])/(self.c1*np.sqrt(np.sin(pars[1])+1.))* \ - np.exp(-(r-pars[0])**2/(self.c2*(np.sin(pars[1])+1.))) - - def getmodule(self): - return __name__ - - #### Other methods #### - - def max(self, pars): - """Return position and height of the peak maximum.""" - # TODO: Reconsider this behavior - if len(pars) == 0: - return None - - # Transform parameters for convenience. - tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") - - rmax = tpars[0] - ymax = self._valueraw(pars, rmax) - return np.array([rmax, ymax]) - -#end of class Gaussian - -# simple test code -if __name__ == '__main__': - - import matplotlib.pyplot as plt - from numpy.random import randn - - from diffpy.srmise.modelcluster import ModelCluster - from diffpy.srmise.modelevaluators import AICc - from diffpy.srmise.peaks import Peaks - - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) # default unknown errors - pf = Gaussian(.7) - evaluator = AICc() - - pars = [[3, .2, 10], [3.5, .2, 10]] - ideal_peaks = Peaks([pf.createpeak(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) - - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] - guess_peaks = Peaks([pf.createpeak(p, "pwa") for p in guesspars]) - cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) - - qual1 = cluster.quality() - print qual1.stat - cluster.fit() - yfit = cluster.calc() - qual2 = cluster.quality() - print qual2.stat - - plt.figure(1) - plt.plot(r, y, r, yfit) - plt.show() diff --git a/diffpy/srmise/peaks/gaussianoverr.py b/diffpy/srmise/peaks/gaussianoverr.py deleted file mode 100644 index f699d9f..0000000 --- a/diffpy/srmise/peaks/gaussianoverr.py +++ /dev/null @@ -1,417 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -import logging - -import matplotlib.pyplot as plt -import numpy as np - -import diffpy.srmise.srmiselog -from diffpy.srmise.peaks.base import PeakFunction -from diffpy.srmise.srmiseerrors import SrMiseEstimationError, SrMiseScalingError, SrMiseTransformationError - -logger = logging.getLogger("diffpy.srmise") - -class GaussianOverR (PeakFunction): - """Methods for evaluation and parameter estimation of width-limited Gaussian/r. - - Allowed formats are - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] - - The internal parameterization is unconstrained, but are interpreted - so that the width is between 0 and a user-provided maximum full width - at half maximum, and the area is positive. - - Note that all full width at half maximum values are for the - corresponding Gaussian. - """ - - # Possibly implement cutoff later, but low priority. - # cutoff=3/np.sqrt(2*np.log(2)) - # cutoff defines a distance = maxwidth*cutoff from the maximum beyond - # which the function is considered 0. By default this distance is - # equivalent to 3 standard deviations. - def __init__(self, maxwidth, Cache=None): - """maxwidth defined as full width at half maximum for the - corresponding Gaussian, which is physically relevant.""" - parameterdict={'position':0,'width':1,'area':2} - formats=['internal','pwa','mu_sigma_area'] - default_formats={'default_input':'internal', 'default_output':'pwa'} - metadict = {} - metadict["maxwidth"] = (maxwidth, repr) - PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) - - if maxwidth <= 0: - emsg = "'maxwidth' must be greater than 0." - raise ValueError(emsg) - self.maxwidth = maxwidth - - ### Useful constants ### - #c1 and c2 help with function values - self.c1 = self.maxwidth*np.sqrt(np.pi/(8*np.log(2))) - self.c2 = self.maxwidth**2/(8*np.log(2)) - - #c3 and c4 help with parameter estimation - self.c3 = .5*np.sqrt(np.pi/np.log(2)) - self.c4 = np.pi/(self.maxwidth*2) - - #convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma - self.sigma2fwhm = 2*np.sqrt(2*np.log(2)) - - return - - #### Methods required by PeakFunction #### - - def estimate_parameters(self, r, y): - """Estimate parameters for single peak from data provided. - - Parameters - r: (Numpy array) Data along r from which to estimate - y: (Numpy array) Data along y from which to estimate - - Returns Numpy array of parameters in the default internal format. - Raises SrMiseEstimationError if parameters cannot be estimated for any - reason.""" - if len(r) != len(y): - emsg = "Arrays r, y must have equal length." - raise SrMiseEstimationError(emsg) - - logger.debug("Estimate peak using %s point(s)", len(r)) - - minpoints_required = 3 - - # filter out negative points - usable_idx = [i for i in range(len(y)) if y[i] > 0] - use_r = r[usable_idx] - use_y = y[usable_idx] - - if len(usable_idx) < minpoints_required: - emsg = "Not enough data for successful estimation." - raise SrMiseEstimationError(emsg) - - #### Estimation #### - guesspars = np.array([0., 0., 0.], dtype=float) - min_y = use_y.min() - max_y = use_y.max() - center = use_r[use_y.argmax()] - - if min_y != max_y: - weights = (use_y-min_y)**2 - guesspars[0] = np.sum(use_r*weights)/sum(weights) - # guesspars[0] = center - if use_y[0] < max_y: - sigma_left = np.sqrt(-.5*(use_r[0]-guesspars[0])**2/np.log(use_y[0]/max_y)) - else: - sigma_left = np.sqrt(-.5*np.mean(np.abs(np.array([use_r[0]-guesspars[0], use_r[-1]-guesspars[0]])))**2/np.log(min_y/max_y)) - if use_y[-1] self.maxwidth: - #account for width-limit - guesspars[2] = self.c3*max_y*guesspars[0]*self.maxwidth - guesspars[1] = np.pi/2 #parameterized in terms of sin - else: - guesspars[2] = self.c3*max_y*guesspars[0]*guesspars[1] - guesspars[1] = np.arcsin(2*guesspars[1]**2/self.maxwidth**2-1.) #parameterized in terms of sin - - return guesspars - - def scale_at(self, pars, x, scale): - """Change parameters so value(x)->scale*value(x). - - Does not change position or height of peak's maxima. Raises - SrMiseScalingError if the parameters cannot be scaled. - - Parameters - pars: (Array) Parameters corresponding to a single peak - x: (float) Position of the border - scale: (float > 0) Size of scaling at x.""" - if scale <= 0: - emsg = ''.join(["Cannot scale by ", str(scale), "."]) - raise SrMiseScalingError(emsg) - - if scale == 1: - return pars - else: - ratio = 1/scale # Ugly: Equations orig. solved in terms of ratio - - tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") - - #solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) - # 2. f(x;mu1,sigma1,area1)=ratio*f(x;mu1,sigma2,area2) - # 3. 1/2*(mu1+sqrt(mu1^2+sigma1^2))=1/2*(mu2+sqrt(mu2^2+sigma2^2))=rmax - # for mu2, sigma2, area2 (with appropriate unit conversions to fwhm at the end). - # The expression for rmax is the appropriate solution to df/dr=0 - mu1, sigma1, area1 = tpars - - # position of the peak maximum - try: - rmax = self.max(pars)[0] - except ValueError, err: - raise SrMiseScalingError(str(err)) - - # lhs of eqn1/eqn2 multiplied by ratio. Then take the log. - log_ratio_prime = np.log(ratio)+(x-rmax)*(x-2*mu1+rmax)/(2*sigma1**2) - - # the semi-nasty algebra reduces to something nice - sigma2 = np.sqrt(.5*rmax*(x-rmax)**2/(x-rmax+rmax*log_ratio_prime)) - mu2 = (sigma2**2+rmax**2)/rmax - area2 = area1*(sigma2/sigma1)*np.exp(-(rmax-mu1)**2/(2*sigma1**2))/np.exp(-(rmax-mu2)**2/(2*sigma2**2)) - - tpars[0] = mu2 - tpars[1] = sigma2 - tpars[2] = area2 - try: - tpars = self.transform_parameters(tpars, in_format="mu_sigma_area", out_format="internal") - except SrMiseTransformationError, err: - raise SrMiseScalingError(str(err)) - return tpars - - def _jacobianraw(self, pars, r, free): - """Return Jacobian of width-limited Gaussian/r. - - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation. - """ - jacobian=[None, None, None] - if (free == False).sum() == self.npars: - return jacobian - - #Optimization - sin_p = np.sin(pars[1]) + 1. - p0minusr = pars[0]-r - exp_p = np.exp(-(p0minusr)**2/(self.c2*sin_p))/(np.abs(r)*self.c1*np.sqrt(sin_p)) - - if free[0]: - #derivative with respect to peak position - jacobian[0] = -2.*exp_p*p0minusr*np.abs(pars[2])/(self.c2*sin_p) - if free[1]: - #derivative with respect to reparameterized peak width - jacobian[1] = -exp_p*np.abs(pars[2])*np.cos(pars[1])*(self.c2*sin_p-2*p0minusr**2)/(2.*self.c2*sin_p**2) - if free[2]: - #derivative with respect to peak area - #abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has - #non-zero area and the function is paramaterized such that values of either sign represent equivalent - #curves I arbitrarily choose positive sign for pars[2]==0 in order to push the system back into a realistic - #parameter space should this improbable scenario occur. - # jacobian[2] = sign(pars[2])*exp_p - if pars[2] >= 0: - jacobian[2] = exp_p - else: - jacobian[2] = -exp_p - return jacobian - - def _transform_derivativesraw(self, pars, in_format, out_format): - """Return gradient matrix for the pars converted from in_format to out_format. - - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] - """ - # With these three formats only the width-related parameter changes. - # Therefore the gradient matrix is the identity matrix with the possible - # exception of the element at [1,1]. - g = np.identity(self.npars) - - if in_format == out_format: - return - - if in_format == "internal": - if out_format == "pwa": - g[1,1] = self.maxwidth/(2*np.sqrt(2))*np.cos(pars[1])/np.sqrt(1+np.sin(pars[1])) - elif out_format == "mu_sigma_area": - g[1,1] = self.maxwidth/(2*np.sqrt(2)*self.sigma2fwhm)*np.cos(pars[1])/np.sqrt(1+np.sin(pars[1])) - else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - elif in_format == "pwa": - if out_format == "internal": - g[1,1] = 2/np.sqrt(self.maxwidth**2-pars[1]**2) - elif out_format == "mu_sigma_area": - g[1,1] = 1/self.sigma2fwhm - else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - elif in_format == "mu_sigma_area": - if out_format == "internal": - g[1,1] = 2*self.sigma2fwhm/np.sqrt(self.maxwidth**2-(self.sigma2fwhm*pars[1])**2) - elif out_format == "pwa": - g[1,1] = self.sigma2fwhm - else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - - return g - - def _transform_parametersraw(self, pars, in_format, out_format): - """Convert parameter values from in_format to out_format. - - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. - - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] - """ - temp = np.array(pars) - - # Do I need to change anything? The internal parameters may need to be - # placed into the preferred range, even though their interpretation does - # not change. - if in_format == out_format and in_format != "internal": - return pars - - # Convert to intermediate format "internal" - if in_format == "internal": - # put the parameter for width in the "physical" quadrant [-pi/2,pi/2], - # where .5*(sin(p)+1) covers fwhm = [0, maxwidth] - n = np.floor((temp[1]+np.pi/2)/np.pi) - if np.mod(n, 2) == 0: - temp[1] = temp[1] - np.pi*n - else: - temp[1] = np.pi*n - temp[1] - temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one - elif in_format == "pwa": - if temp[1] > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(temp[1], self.maxwidth) - raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*temp[1]**2/self.maxwidth**2-1.) - elif in_format == "mu_sigma_area": - fwhm = temp[1]*self.sigma2fwhm - if fwhm > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(fwhm, self.maxwidth) - raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*fwhm**2/self.maxwidth**2-1.) - else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - - # Convert to specified output format from "internal" format. - if out_format == "internal": - pass - elif out_format == "pwa": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2) - elif out_format == "mu_sigma_area": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2)/self.sigma2fwhm - else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - return temp - - def _valueraw(self, pars, r): - """Return value of width-limited Gaussian/r for the given parameters and r values. - - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated - """ - return np.abs(pars[2])/(np.abs(r)*self.c1*np.sqrt(np.sin(pars[1])+1.))* \ - np.exp(-(r-pars[0])**2/(self.c2*(np.sin(pars[1])+1.))) - - def getmodule(self): - return __name__ - - #### Other methods #### - - def max(self, pars): - """Return position and height of the peak maximum.""" - # TODO: Reconsider this behavior - if len(pars) == 0: - return None - - # Transform parameters for convenience. - tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") - - # The Gaussian/r only has a local maximum under this condition. - # Physically realistic peaks will always meet this condition, but - # trying to fit a signal down to r=0 could conceivably lead to issues. - if tpars[0]**2 <= 4*tpars[1]**2: - emsg = ''.join(["No local maximum with parameters\n", str(pars)]) - raise ValueError(emsg) - - rmax = .5*(tpars[0]+np.sqrt(tpars[0]**2-4*tpars[1]**2)) - ymax = self._valueraw(pars, rmax) - return np.array([rmax, ymax]) - -#end of class GaussianOverR - -# simple test code -if __name__ == '__main__': - - import matplotlib.pyplot as plt - from numpy.random import randn - - from diffpy.srmise.modelcluster import ModelCluster - from diffpy.srmise.modelevaluators import AICc - from diffpy.srmise.peaks import Peaks - - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) # default unknown errors - pf = GaussianOverR(.7) - evaluator = AICc() - - pars = [[3, .2, 10], [3.5, .2, 10]] - ideal_peaks = Peaks([pf.createpeak(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) - - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] - guess_peaks = Peaks([pf.createpeak(p, "pwa") for p in guesspars]) - cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) - - qual1 = cluster.quality() - print qual1.stat - cluster.fit() - yfit = cluster.calc() - qual2 = cluster.quality() - print qual2.stat - - plt.figure(1) - plt.plot(r, y, r, yfit) - plt.show() diff --git a/diffpy/srmise/peaks/terminationripples.py b/diffpy/srmise/peaks/terminationripples.py deleted file mode 100644 index cdce773..0000000 --- a/diffpy/srmise/peaks/terminationripples.py +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -import logging - -import numpy as np -import scipy.fftpack as fp - -import diffpy.srmise.srmiselog -from diffpy.srmise.peaks.base import PeakFunction - -logger = logging.getLogger("diffpy.srmise") - -class TerminationRipples (PeakFunction): - """Methods for evaluation and parameter estimation of a peak function with termination ripples.""" - - def __init__(self, base, qmax, extension=4., supersample=5., Cache=None): - """Peak function which adds termination ripples to existing function. - - Unlike other peak functions, TerminationRipples can only be evaluated - over a uniform grid, or at a single value using an ad hoc uniform grid - defined by qmax, extension, and supersample. - - Parameters - base: Instance of PeakFunction subclass. - qmax: Cut-off frequency in reciprocal space. - extension: How many multiples of 2pi/qmax to extend calculations in - order to avoid edge effects. - supersample: Number intervals over 2pi/qmax when a natural interval - cannot be determined while extending calculations. - Cache: A class (not instance) which implements caching of PeakFunction - evaluations.""" - parameterdict = base.parameterdict - formats = base.parformats - default_formats = base.default_formats - self.base = base - self.qmax = qmax - self.extension = extension - self.supersample = supersample - metadict = {} - metadict["qmax"] = (qmax, repr) - metadict["extension"] = (extension, repr) - metadict["supersample"] = (supersample, repr) - PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, base, Cache) - return - - #### Methods required by PeakFunction #### - - # TODO: A smart way to convert from the basefunctions estimate to an - # appropriate one when ripples are considered. This may not be necessary, - # though. - def estimate_parameters(self, r, y): - """Estimate parameters for single peak from data provided. - - Uses estimation routine provided by base peak function. - - Parameters - r: (Numpy array) Data along r from which to estimate - y: (Numpy array) Data along y from which to estimate - - Returns Numpy array of parameters in the default internal format. - Raises SrMiseEstimationError if parameters cannot be estimated for any - reason.""" - return self.base.estimate_parameters(r, y) - - - # TODO: Can this be implemented sanely for termination ripples? - def scale_at(self, pars, x, scale): - """Change parameters so value(x)->scale*value(x) for the base function. - - Does not change position or height of peak's maxima. Raises - SrMiseScalingError if the parameters cannot be scaled. - - Parameters - pars: (Array) Parameters corresponding to a single peak - x: (float) Position of the border - scale: (float > 0) Size of scaling at x.""" - return self.base.scale_at(pars, x, scale) - - def _jacobianraw(self, pars, r, free): - """Return Jacobian of base function with termination ripples. - - Parameters - pars: Sequence of parameters for a single peak - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation.""" - return self.base._jacobianraw(pars, r, free) - - def _transform_derivativesraw(self, pars, in_format, out_format): - """Return gradient matrix for the pars converted from in_format to out_format. - - Parameters - pars: Sequence of parameters - in_format: A format defined for base peak function - out_format: A format defined for base peak function""" - return self.base._transform_derivativesraw(pars, in_format, out_format) - - def _transform_parametersraw(self, pars, in_format, out_format): - """Convert parameter values from in_format to out_format. - - Parameters - pars: Sequence of parameters - in_format: A format defined for base peak function - out_format: A format defined for base peak function""" - return self.base._transform_parametersraw(pars, in_format, out_format) - - def _valueraw(self, pars, r): - """Return value of base peak function for the given parameters and r values. - - pars: Sequence of parameters for a single peak - r: sequence or scalar over which pars is evaluated""" - return self.base._valueraw(pars, r) - - #### Overridden PeakFunction functions #### - # jacobian() and value() are not normally overridden by PeakFunction - # subclasses, but are here to minimize the effect of edge-effects while - # introducing termination ripples. - - def jacobian(self, peak, r, rng=None): - """Calculate (rippled) jacobian, possibly restricted by range. - - peak: The Peak to be evaluated - r: sequence or scalar over which peak is evaluated - rng: Optional slice object restricts which r-values are evaluated. - The output has same length as r, but unevaluated objects have - a default value of 0. If caching is enabled these may be - previously calculated values instead.""" - if self is not peak._owner: - raise ValueError("Argument 'peak' must be evaluated by the " - "PeakFunction subclass instance with which " - "it is associated.") - - # normally r will be a sequence, but also allow single numeric values - try: - if len(r) > 1: - dr = (r[-1]-r[0])/(len(r)-1) - else: - # dr is ad hoc if r is a single point - dr = 2*np.pi/(self.supersample*self.qmax) - - if rng is None: - rng = slice(0, len(r)) - rpart = r[rng] - (ext_r, ext_slice) = self.extend_grid(rpart, dr) - jac = self._jacobianraw(peak.pars, ext_r, peak.free) - output = [None for j in jac] - for idx in range(len(output)): - if jac[idx] is not None: - jac[idx] = self.cut_freq(jac[idx], dr) - output[idx] = r * 0. - output[idx][rng] = jac[idx][ext_slice] - return output - except (TypeError): - # dr is ad hoc if r is a single point. - dr = 2*np.pi/(self.supersample*self.qmax) - (ext_r, ext_slice) = self.extend_grid(np.array([r]), dr) - jac = self._jacobianraw(peak.pars, ext_r, peak.free) - for idx in range(len(output)): - if jac[idx] is not None: - jac[idx] = self.cut_freq(jac[idx], dr)[ext_slice][0] - return jac - - - def value(self, peak, r, rng=None): - """Calculate (rippled) value of peak, possibly restricted by range. - - This function overrides its counterpart in PeakFunction in order - to minimize the impact of edge-effects from introducing termination - ripples into an existing peak function. - - peak: The Peak to be evaluated - r: sequence or scalar over which peak is evaluated - rng: Optional slice object restricts which r-values are evaluated. - The output has same length as r, but unevaluated objects have - a default value of 0. If caching is enabled these may be - previously calculated values instead. - """ - if self is not peak._owner: - raise ValueError("Argument 'peak' must be evaluated by the " - "PeakFunction subclass instance with which " - "it is associated.") - - # normally r will be a sequence, but also allow single numeric values - - dr_super = 2*np.pi/(self.supersample*self.qmax) - if np.isscalar(r): - # dr is ad hoc if r is a single point. - (ext_r, ext_slice) = self.extend_grid(np.array([r]), dr_super) - value = self._valueraw(peak.pars, ext_r) - value = self.cut_freq(value, dr_super) - return value[ext_slice][0] - else: - if rng is None: - rng = slice(0, len(r)) - - output = r * 0. - - # Make sure the actual dr used for finding termination ripples - # is at least as fine as dr_super, while still calculating the - # function at precisely the requested points. - # When the underlying function is sampled too coarsely it can - # miss critical high frequency components and return a very - # poor approximation to the continuous case. The actual fineness - # of sampling needed to avoid the worst of these discretization - # issues is difficult to determine without detailed knowledge - # of the underlying function. - dr = (r[-1]-r[0])/(len(r)-1) - segments = np.ceil(dr/dr_super) - dr_segmented = dr/segments - - rpart = r[rng] - if segments > 1: - rpart = np.arange(rpart[0], rpart[-1] + dr_segmented/2, dr_segmented) - - (ext_r, ext_slice) = self.extend_grid(rpart, dr_segmented) - value = self._valueraw(peak.pars, ext_r) - value = self.cut_freq(value, dr_segmented) - output[rng] = value[ext_slice][::segments] - - return output - - def getmodule(self): - return __name__ - - #### Other methods #### - - def cut_freq(self, sequence, delta): - """Remove high-frequency components from sequence. - - This is equivalent to the discrete convolution of a signal with a sinc - function sin(2*pi*r/qmax)/r. - - Parameters - sequence: (numpy array) The sequence to alter. - delta: The spacing between elements in sequence.""" - padlen = int(2**np.ceil(np.log2(len(sequence)))) - padseq = fp.fft(sequence, padlen) - dq = 2*np.pi/((padlen-1)*delta) - lowidx = int(np.ceil(self.qmax/dq)) - hiidx = padlen+1-lowidx - - # Remove hi-frequency components - padseq[lowidx:hiidx]=0 - - padseq = fp.ifft(padseq) - return np.real(padseq[0:len(sequence)]) - - def extend_grid(self, r, dr): - """Return (extended r, slice giving original range).""" - ext = self.extension*2*np.pi/self.qmax - left_ext = np.arange(r[0]-dr, max(0., r[0]-ext-dr), -dr)[::-1] - right_ext = np.arange(r[-1]+dr, r[-1]+ext+dr, dr) - ext_r = np.concatenate((left_ext, r, right_ext)) - ext_slice = slice(len(left_ext), len(ext_r)-len(right_ext)) - return (ext_r, ext_slice) - -#end of class TerminationRipples - -# simple test code -if __name__ == '__main__': - - import matplotlib.pyplot as plt - from numpy.random import randn - - from diffpy.srmise.modelcluster import ModelCluster - from diffpy.srmise.modelevaluator import AICc - from diffpy.srmise.peakfunctions.gaussianoverr import GaussianOverR - from diffpy.srmise.peakfunctions.peaks import Peaks - from diffpy.srmise.peakfunctions.terminationripples import TerminationRipples - - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) #default unknown errors - pf1 = GaussianOverR(.7) - pf2 = TerminationRipples(pf1, 20.) - evaluator = AICc() - - pars = [[3, .2, 10], [3.5, .2, 10]] - ideal_peaks = Peaks([pf1.createpeak(p, "pwa") for p in pars]) - ripple_peaks = Peaks([pf2.createpeak(p, "pwa") for p in pars]) - y_ideal = ideal_peaks.value(r) - y_ripple = ripple_peaks.value(r) + .1*randn(len(r)) - - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] - guess_peaks = Peaks([pf2.createpeak(p, "pwa") for p in guesspars]) - cluster = ModelCluster(guess_peaks, r, y_ripple, err, None, AICc, [pf2]) - - qual1 = cluster.quality() - print qual1.stat - cluster.fit() - yfit = cluster.calc() - qual2 = cluster.quality() - print qual2.stat - - plt.figure(1) - plt.plot(r, y_ideal, r, y_ripple, r, yfit) - plt.show() diff --git a/diffpy/srmise/version.py b/diffpy/srmise/version.py deleted file mode 100644 index d0c6e1d..0000000 --- a/diffpy/srmise/version.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python -############################################################################## -# -# SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. -# All rights reserved. -# -# File coded by: Luke Granlund -# -# See LICENSE.txt for license information. -# -############################################################################## - -"""Definition of __version__, __date__, __gitsha__. -""" - -from ConfigParser import SafeConfigParser -from pkg_resources import resource_stream - -# obtain version information from the version.cfg file -cp = SafeConfigParser() -cp.readfp(resource_stream(__name__, "version.cfg")) - -__version__ = cp.get("DEFAULT", "version") -__date__ = cp.get("DEFAULT", "date") -__gitsha__ = cp.get("DEFAULT", "commit") - -del cp - -# End of file diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..798f52b --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,194 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build +BASENAME = $(subst .,,$(subst $() $(),,diffpy.srmise)) + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/$(BASENAME).qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/$(BASENAME).qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/$(BASENAME)" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/$(BASENAME)" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +# Manual publishing to the gh-pages branch + +GITREPOPATH = $(shell cd $(CURDIR) && git rev-parse --git-dir) +GITREMOTE = origin +GITREMOTEURL = $(shell git config --get remote.$(GITREMOTE).url) +GITLASTCOMMIT = $(shell git rev-parse --short HEAD) + +publish: + @test -d build/html || \ + ( echo >&2 "Run 'make html' first!"; false ) + git show-ref --verify --quiet refs/heads/gh-pages || \ + git branch --track gh-pages $(GITREMOTE)/gh-pages + test -d build/gh-pages || \ + git clone -s -b gh-pages $(GITREPOPATH) build/gh-pages + cd build/gh-pages && \ + git pull $(GITREMOTEURL) gh-pages + rsync -acv --delete --exclude=.git --exclude=.rsync-exclude \ + --exclude-from=build/gh-pages/.rsync-exclude \ + --link-dest=$(CURDIR)/build/html build/html/ build/gh-pages/ + cd build/gh-pages && \ + git add --all . && \ + git diff --cached --quiet || \ + git commit -m "Sync with the source at $(GITLASTCOMMIT)." + cd build/gh-pages && \ + git push origin gh-pages diff --git a/doc/examples/README b/doc/examples/README.rst similarity index 100% rename from doc/examples/README rename to doc/examples/README.rst diff --git a/doc/examples/extract_single_peak.py b/doc/examples/extract_single_peak.py index 99edd4c..5004172 100644 --- a/doc/examples/extract_single_peak.py +++ b/doc/examples/extract_single_peak.py @@ -27,26 +27,26 @@ import matplotlib.pyplot as plt -from diffpy.srmise import PDFPeakExtraction from diffpy.srmise.applications.plot import makeplot -from diffpy.srmise.baselines import Polynomial +from diffpy.srmise.baselines.polynomial import Polynomial +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction def run(plot=True): - ## Initialize peak extraction + # Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() # Load the PDF from a file ppe.loadpdf("data/Ag_nyquist_qmax30.gr") - ## Set up extraction parameters. - # For convenience we add all parameters to a dictionary before passing them + # Set up extraction parameters. + # For convenience, we add all parameters to a dictionary before passing them # to the extraction object. # # The "rng" (range) parameter defines the region over which peaks will be - # extracted and fit. For the well isolated nearest-neighbor silver peak, + # extracted and fit. For the well isolated nearest-neighbor silver peak, # which occurs near 2.9 angstroms, it is sufficient to perform extraction # between 2 and 3.5 angstroms. # @@ -61,24 +61,24 @@ def run(plot=True): # Apply peak extraction parameters. ppe.setvars(**kwds) - ## Perform peak extraction + # Perform peak extraction ppe.extract() - ## Save output + # Save output # The write() method saves a file which preserves all aspects of peak # extraction and its results, by convention using the .srmise extension, # and which can later be read by diffpy.srmise. # # The writepwa() method saves a file intended as a human-readable summary. # In particular, it reports the position, width (as full-width at - # half-maximum), and area of of extracted peaks. The reported values + # half-maximum), and area of extracted peaks. The reported values # are for Gaussians in the radial distribution function (RDF) corresponding # to this PDF. ppe.write("output/extract_single_peak.srmise") ppe.writepwa("output/extract_single_peak.pwa") - ## Plot results. - # Display plot of extracted peak. It is also possible to plot an existing + # Plot results. + # Display plot of extracted peak. It is also possible to plot an existing # .srmise file from the command line using # srmise output/Ag_singlepeak.srmise --no-extract --plot # For additional plotting options, run "srmiseplot --help". diff --git a/doc/examples/fit_initial.py b/doc/examples/fit_initial.py index 33bf4e6..fa9961c 100644 --- a/doc/examples/fit_initial.py +++ b/doc/examples/fit_initial.py @@ -20,21 +20,21 @@ grid.""" import matplotlib.pyplot as plt -import numpy as np -from diffpy.srmise import PDFPeakExtraction from diffpy.srmise.applications.plot import makeplot -from diffpy.srmise.baselines import FromSequence -from diffpy.srmise.peaks import Peaks, TerminationRipples +from diffpy.srmise.baselines.fromsequence import FromSequence +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction +from diffpy.srmise.peaks.base import Peaks +from diffpy.srmise.peaks.terminationripples import TerminationRipples def run(plot=True): - ## Initialize peak extraction + # Initialize peak extraction ppe = PDFPeakExtraction() ppe.loadpdf("data/C60_fine_qmax21.gr") - ## Set up interpolated baseline. + # Set up interpolated baseline. # The FromSequence baseline creates an interpolated baseline from provided # r and G(r) values, either two lists or a file containing (r, G(r)) pairs. # The baseline has no parameters. This particular baseline was estimated @@ -43,7 +43,7 @@ def run(plot=True): blf = FromSequence("data/C60baseline.dat") bl = blf.actualize([]) - ## Set up fitting parameters + # Set up fitting parameters # A summary of how parameters impact fitting is given below. # "rng" - Same as peak extraction # "baseline" - Same as peak extraction @@ -66,7 +66,7 @@ def run(plot=True): kwds["dg"] = 5000 # ad hoc, but gives each point equal weight in fit. ppe.setvars(**kwds) - ## Set up termination ripples + # Set up termination ripples # Peak fitting never changes the peak function, so termination ripples # are not applied automatically as they are in peak extraction. # Termination ripples require setting the underlying peak function and qmax. @@ -95,7 +95,7 @@ def run(plot=True): # Perform fit. ppe.fit() - ## Save results + # Save results ppe.write("output/fit_initial.srmise") ppe.writepwa("output/fit_initial.pwa") diff --git a/doc/examples/multimodel_known_dG1.py b/doc/examples/multimodel_known_dG1.py index aa50c41..f1fe508 100644 --- a/doc/examples/multimodel_known_dG1.py +++ b/doc/examples/multimodel_known_dG1.py @@ -37,47 +37,47 @@ import numpy as np import diffpy.srmise.srmiselog as sml -from diffpy.srmise import MultimodelSelection, PDFPeakExtraction -from diffpy.srmise.applications.plot import makeplot +from diffpy.srmise.multimodelselection import MultimodelSelection +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction def run(plot=True): - ## Suppress mundane output + # Suppress mundane output # When running scripts, especially involving multiple trials, it can be # useful to suppress many of the diffpy.srmise messages. Valid levels # include "debug", "info" (the default), "warning", "error", and # "critical." See diffpy.srmise.srmiselog for more information. sml.setlevel("warning") - ## Initialize peak extraction from saved trial + # Initialize peak extraction from saved trial ppe = PDFPeakExtraction() ppe.read("output/query_results.srmise") ppe.clearcalc() - ## Set up extraction parameters + # Set up extraction parameters # All parameters loaded from .srmise file. # Setting new values will override the previous values. kwds = {} kwds["rng"] = [10.9, 15] # Region of PDF with some overlap. ppe.setvars(**kwds) - ## Create multimodel selection object. + # Create multimodel selection object. # The MultimodelSelection class keeps track of the results of peak # extraction as the assumed uncertainty dg is varied. ms = MultimodelSelection() ms.setppe(ppe) - ## Define range of dg values + # Define range of dg values # For the purpose of illustration use 15 evenly-spaced values of dg where # 50% < dg < 120% of mean experimental dG in extraction range. dg_mean = np.mean(ppe.dy[ppe.getrangeslice()]) dgs = np.linspace(0.5 * dg_mean, 1.2 * dg_mean, 15) - ## Perform peak extraction for each of the assumed uncertainties. + # Perform peak extraction for each of the assumed uncertainties. ms.run(dgs) - ## Save results + # Save results # The file known_dG_models.dat saves the models generated above. The file # known_dG_aics.dat saves the value of the AIC of each model when evaluated # on a Nyquist-sampled grid using each of the dg values used to generate diff --git a/doc/examples/multimodel_known_dG2.py b/doc/examples/multimodel_known_dG2.py index d061981..6e6fdb3 100644 --- a/doc/examples/multimodel_known_dG2.py +++ b/doc/examples/multimodel_known_dG2.py @@ -35,31 +35,46 @@ import numpy as np import diffpy.srmise.srmiselog as sml -from diffpy.srmise import MultimodelSelection from diffpy.srmise.applications.plot import makeplot +from diffpy.srmise.multimodelselection import MultimodelSelection # distances from ideal Ag (refined to PDF) -dcif = np.array([11.2394, 11.608, 11.9652, 12.3121, 12.6495, 12.9781, 13.2986, - 13.6116, 13.9175, 14.2168, 14.51, 14.7973]) +dcif = np.array( + [ + 11.2394, + 11.608, + 11.9652, + 12.3121, + 12.6495, + 12.9781, + 13.2986, + 13.6116, + 13.9175, + 14.2168, + 14.51, + 14.7973, + ] +) + def run(plot=True): # Suppress mundane output sml.setlevel("warning") - ## Create multimodeling object and load diffpy.srmise results from file. + # Create multimodeling object and load diffpy.srmise results from file. ms = MultimodelSelection() ms.load("output/known_dG_models.dat") ms.loadaics("output/known_dG_aics.dat") - ## Use Nyquist sampling + # Use Nyquist sampling # Standard AIC analysis assumes the data have independent uncertainties. # Nyquist sampling minimizes correlations in the PDF, which is the closest # approximation to independence possible for the PDF. - dr = np.pi/ms.ppe.qmax - (r,y,dr2,dy) = ms.ppe.resampledata(dr) + dr = np.pi / ms.ppe.qmax + (r, y, dr2, dy) = ms.ppe.resampledata(dr) - ## Classify models + # Classify models # All models are placed into classes. Models in the same class # should be essentially identical (same peak parameters, etc.) # up to a small tolerance determined by comparing individual peaks. The @@ -75,16 +90,16 @@ def run(plot=True): tolerance = 0.2 ms.classify(r, tolerance) - ## Summarize various facts about the analysis. + # Summarize various facts about the analysis. num_models = len(ms.results) num_classes = len(ms.classes) - print "------- Multimodeling Summary --------" - print "Models: %i" %num_models - print "Classes: %i (tol=%s)" %(num_classes, tolerance) - print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1]) - print "Nyquist-sampled data points: %i" %len(r) + print("------- Multimodeling Summary --------") + print("Models: %i" % num_models) + print("Classes: %i (tol=%s)" % (num_classes, tolerance)) + print("Range of dgs: %f-%f" % (ms.dgs[0], ms.dgs[-1])) + print("Nyquist-sampled data points: %i" % len(r)) - ## Get dG usable as key in analysis. + # Get dG usable as key in analysis. # The Akaike probabilities were calculated for many assumed values of the # experimental uncertainty dG, and each of these assumed dG is used as a # key when obtaining the corresponding results. Numerical precision can @@ -92,7 +107,7 @@ def run(plot=True): # the key closest to its argument. dG = ms.dg_key(np.mean(ms.ppe.dy)) - ## Find "best" models. + # Find "best" models. # In short, models with greatest Akaike probability. Akaike probabilities # can only be validly compared if they were calculated for identical data, # namely identical PDF values *and* uncertainties, and are only reliable @@ -101,11 +116,11 @@ def run(plot=True): # # The present PDF satisifes these conditions, so the rankings below reflect # an AIC-based estimate of which of the tested models the data best support. - print "\n--------- Model Rankings for dG = %f ---------" %dG - print "Rank Model Class Free AIC Prob File" + print("\n--------- Model Rankings for dG = %f ---------" % dG) + print("Rank Model Class Free AIC Prob File") for i in range(len(ms.classes)): - ## Generate information about best model in ith best class. + # Generate information about best model in ith best class. # The get(dG, *args, **kwds) method returns a tuple of values # corresponding to string arguments for the best model in best class at # given dG. When the corder keyword is given it returns the model from @@ -117,23 +132,25 @@ def run(plot=True): # "prob" -> The AIC probability given uncertainty dG # These all have dedicated getter functions. For example, the model # index can also be obtained using get_model(dG, corder=i) - (model, cls, nfree, aic, prob) = \ - ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i) + (model, cls, nfree, aic, prob) = ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i) - filename_base = "output/known_dG_m"+str(model) + filename_base = "output/known_dG_m" + str(model) - # Print info for this model - print "%4i %5i %5i %4i %10.4e %6.3f %s" \ - %(i+1, model, cls, nfree, aic, prob, filename_base + ".pwa") + # print(info for this model + print( + "%4i %5i %5i %4i %10.4e %6.3f %s" % (i + 1, model, cls, nfree, aic, prob, filename_base + ".pwa") + ) # A message added as a comment to saved .pwa file. - msg = ["Multimodeling Summary", - "---------------------", - "Evaluated at dG: %s" %dG, - "Model: %i (of %i)" %(model, num_models), - "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance), - "Akaike probability: %g" %prob, - "Rank: %i" %(i+1),] + msg = [ + "Multimodeling Summary", + "---------------------", + "Evaluated at dG: %s" % dG, + "Model: %i (of %i)" % (model, num_models), + "Class: %i (of %i, tol=%s)" % (cls, num_classes, tolerance), + "Akaike probability: %g" % prob, + "Rank: %i" % (i + 1), + ] msg = "\n".join(msg) # Make this the active model @@ -146,13 +163,11 @@ def run(plot=True): if plot: plt.figure() makeplot(ms.ppe, dcif) - plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" \ - %(model, cls, i+1, prob)) + plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" % (model, cls, i + 1, prob)) # Uncomment line below to save figures. # plt.savefig(filename_base + ".png", format="png") - - ## 3D plot of Akaike probabilities + # 3D plot of Akaike probabilities # This plot shows the Akaike probabilities of all classes as a function # of assumed uncertainty dG. This gives a rough sense of how the models # selected by an AIC-based analysis would vary if the experimental @@ -161,13 +176,14 @@ def run(plot=True): # are highlighted. if plot: plt.figure() - ms.plot3dclassprobs(probfilter=[0.0, 1.], highlight=[dG]) + ms.plot3dclassprobs(probfilter=[0.0, 1.0], highlight=[dG]) plt.tight_layout() # Uncomment line below to save figure. - #plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight") + # plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight") if plot: plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/doc/examples/multimodel_unknown_dG1.py b/doc/examples/multimodel_unknown_dG1.py index 3016fb3..4570f78 100644 --- a/doc/examples/multimodel_unknown_dG1.py +++ b/doc/examples/multimodel_unknown_dG1.py @@ -36,24 +36,25 @@ import numpy as np import diffpy.srmise.srmiselog as sml -from diffpy.srmise import MultimodelSelection, PDFPeakExtraction -from diffpy.srmise.baselines import FromSequence +from diffpy.srmise.baselines.fromsequence import FromSequence +from diffpy.srmise.multimodelselection import MultimodelSelection +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction def run(plot=True): - ## Suppress mundane output + # Suppress mundane output # When running scripts, especially involving multiple trials, it can be # useful to suppress many of the diffpy.srmise messages. Valid levels # include "debug", "info" (the default), "warning", "error", and # "critical." See diffpy.srmise.srmiselog for more information. sml.setlevel("warning") - ## Initialize peak extraction + # Initialize peak extraction ppe = PDFPeakExtraction() ppe.loadpdf("data/C60_fine_qmax21.gr") - ## Set up extraction parameters + # Set up extraction parameters # The FromSequence baseline interpolates (r, G(r)) values read from a # specified file. It has parameters. This particular baseline was # calculated by approximating the C60 sample as a face-centered cubic @@ -65,22 +66,22 @@ def run(plot=True): kwds["cres"] = 0.05 ppe.setvars(**kwds) - ## Create multimodel selection object. + # Create multimodel selection object. # The MultimodelSelection class keeps track of the results of peak # extraction as the assumed uncertainty dg is varied. ms = MultimodelSelection() ms.setppe(ppe) - ## Define range of dg values + # Define range of dg values # For the purpose of illustration use 20 evenly-spaced values of dg where # 1% < dg < 10% of max gr value between r=1 and 7.25. grmax = np.max(ppe.y[ppe.getrangeslice()]) dgs = np.linspace(0.01 * grmax, 0.10 * grmax, 20) - ## Perform peak extraction for each of the assumed uncertainties. + # Perform peak extraction for each of the assumed uncertainties. ms.run(dgs) - ## Save results + # Save results # The file C60_models.dat saves the models generated above. The file # C60_aics.dat saves the value of the AIC of each model when evaluated # on a Nyquist-sampled grid using each of the dg values used to generate diff --git a/doc/examples/multimodel_unknown_dG2.py b/doc/examples/multimodel_unknown_dG2.py index 1bc9f60..c4bbef4 100644 --- a/doc/examples/multimodel_unknown_dG2.py +++ b/doc/examples/multimodel_unknown_dG2.py @@ -42,34 +42,55 @@ import numpy as np import diffpy.srmise.srmiselog as sml -from diffpy.srmise import MultimodelSelection from diffpy.srmise.applications.plot import makeplot +from diffpy.srmise.multimodelselection import MultimodelSelection # distances from ideal (unrefined) C60 -dcif = np.array([1.44, 2.329968944, 2.494153163, 2.88, 3.595985339, - 3.704477734, 4.132591264, 4.520339129, 4.659937888, - 4.877358006, 5.209968944, 5.405310018, 5.522583786, - 5.818426502, 6.099937888, 6.164518388, 6.529777754, - 6.686673127, 6.745638756, 6.989906831, 7.136693738]) +dcif = np.array( + [ + 1.44, + 2.329968944, + 2.494153163, + 2.88, + 3.595985339, + 3.704477734, + 4.132591264, + 4.520339129, + 4.659937888, + 4.877358006, + 5.209968944, + 5.405310018, + 5.522583786, + 5.818426502, + 6.099937888, + 6.164518388, + 6.529777754, + 6.686673127, + 6.745638756, + 6.989906831, + 7.136693738, + ] +) + def run(plot=True): # Suppress mundane output sml.setlevel("warning") - ## Create multimodeling object and load diffpy.srmise results from file. + # Create multimodeling object and load diffpy.srmise results from file. ms = MultimodelSelection() ms.load("output/unknown_dG_models.dat") ms.loadaics("output/unknown_dG_aics.dat") - ## Use Nyquist sampling + # Use Nyquist sampling # Standard AIC analysis assumes the data have independent uncertainties. # Nyquist sampling minimizes correlations in the PDF, which is the closest # approximation to independence possible for the PDF. - dr = np.pi/ms.ppe.qmax - (r,y,dr2,dy) = ms.ppe.resampledata(dr) + dr = np.pi / ms.ppe.qmax + (r, y, dr2, dy) = ms.ppe.resampledata(dr) - ## Classify models + # Classify models # All models are placed into classes. Models in the same class # should be essentially identical (same peak parameters, etc.) # up to a small tolerance determined by comparing individual peaks. The @@ -85,16 +106,16 @@ def run(plot=True): tolerance = 0.2 ms.classify(r, tolerance) - ## Summarize various facts about the analysis. + # Summarize various facts about the analysis. num_models = len(ms.results) num_classes = len(ms.classes) - print "------- Multimodeling Summary --------" - print "Models: %i" %num_models - print "Classes: %i (tol=%s)" %(num_classes, tolerance) - print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1]) - print "Nyquist-sampled data points: %i" %len(r) + print("------- Multimodeling Summary --------") + print("Models: %i" % num_models) + print("Classes: %i (tol=%s)" % (num_classes, tolerance)) + print("Range of dgs: %f-%f" % (ms.dgs[0], ms.dgs[-1])) + print("Nyquist-sampled data points: %i" % len(r)) - ## Find "best" models. + # Find "best" models. # In short, models with greatest Akaike probability. Akaike probabilities # can only be validly compared if they were calculated for identical data, # namely identical PDF values *and* uncertainties, and are only reliable @@ -115,16 +136,15 @@ def run(plot=True): best_classes = np.unique([ms.get_class(dG) for dG in ms.dgs]) best_dGs = [] for cls in best_classes: - cls_probs = [ms.get_prob(dG) if ms.get_class(dG) == cls else 0 \ - for dG in ms.dgs] + cls_probs = [ms.get_prob(dG) if ms.get_class(dG) == cls else 0 for dG in ms.dgs] dG = ms.dgs[np.argmax(cls_probs)] best_dGs.append(dG) - print "\n--------- Best models for at least one dG ---------" %dG - print " Best dG Model Class Free AIC Prob File" + print("\n--------- Best models for at least one dG ---------" % dG) + print(" Best dG Model Class Free AIC Prob File") for dG in best_dGs: - ## Generate information about best model. + # Generate information about best model. # The get(dG, *args, **kwds) method returns a tuple of values # corresponding to string arguments for the best model in best class at # given dG. When the corder keyword is given it returns the model from @@ -135,24 +155,26 @@ def run(plot=True): # "aic" -> The AIC for this model given uncertainty dG # "prob" -> The AIC probability given uncertainty dG # These all have dedicated getter functions. - (model, cls, nfree, aic, prob) = \ - ms.get(dG, "model", "class", "nfree", "aic", "prob") + (model, cls, nfree, aic, prob) = ms.get(dG, "model", "class", "nfree", "aic", "prob") - filename_base = "output/unknown_dG_m"+str(model) + filename_base = "output/unknown_dG_m" + str(model) - # Print info for this model - print "%10.4e %5i %5i %4i %10.4e %6.3f %s" \ - %(dG, model, cls, nfree, aic, prob, filename_base + ".pwa") + # print(info for this model + print( + "%10.4e %5i %5i %4i %10.4e %6.3f %s" % (dG, model, cls, nfree, aic, prob, filename_base + ".pwa") + ) # A message added as a comment to saved .pwa file. best_from = [dg for dg in ms.dgs if ms.get_class(dg) == cls] - msg = ["Multimodeling Summary", - "---------------------", - "Model: %i (of %i)" %(model, num_models), - "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance), - "Best model from dG: %s-%s" %(best_from[0], best_from[-1]), - "Evaluated at dG: %s" %dG, - "Akaike probability: %g" %prob] + msg = [ + "Multimodeling Summary", + "---------------------", + "Model: %i (of %i)" % (model, num_models), + "Class: %i (of %i, tol=%s)" % (cls, num_classes, tolerance), + "Best model from dG: %s-%s" % (best_from[0], best_from[-1]), + "Evaluated at dG: %s" % dG, + "Akaike probability: %g" % prob, + ] msg = "\n".join(msg) # Make this the active model @@ -165,13 +187,11 @@ def run(plot=True): if plot: plt.figure() makeplot(ms.ppe, dcif) - plt.title("Model %i/Class %i (Best dG=%f, AIC prob=%f)" \ - %(model, cls, dG, prob)) + plt.title("Model %i/Class %i (Best dG=%f, AIC prob=%f)" % (model, cls, dG, prob)) # Uncomment line below to save figures. # plt.savefig(filename_base + ".png", format="png") - - ## 3D plot of Akaike probabilities + # 3D plot of Akaike probabilities # This plot shows the Akaike probabilities of all classes as a function # of assumed uncertainty dG. This gives a rough sense of how the models # selected by an AIC-based analysis would vary if the experimental @@ -179,13 +199,14 @@ def run(plot=True): # are highlighted at the various dG values found above. if plot: plt.figure() - ms.plot3dclassprobs(probfilter=[0.1, 1.], highlight=best_dGs) + ms.plot3dclassprobs(probfilter=[0.1, 1.0], highlight=best_dGs) plt.tight_layout() # Uncomment line below to save figure. - #plt.savefig("output/unknown_dG_probs.png", format="png", bbox_inches="tight") + # plt.savefig("output/unknown_dG_probs.png", format="png", bbox_inches="tight") if plot: plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/doc/examples/parameter_summary.py b/doc/examples/parameter_summary.py index 2ba1ac1..1d4095c 100644 --- a/doc/examples/parameter_summary.py +++ b/doc/examples/parameter_summary.py @@ -29,33 +29,33 @@ import matplotlib.pyplot as plt -from diffpy.srmise import PDFPeakExtraction from diffpy.srmise.applications.plot import makeplot -from diffpy.srmise.baselines import Polynomial -from diffpy.srmise.peaks import GaussianOverR +from diffpy.srmise.baselines.polynomial import Polynomial +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction +from diffpy.srmise.peaks.gaussianoverr import GaussianOverR def run(plot=True): - ## Initialize peak extraction + # Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() # Load the PDF from a file ppe.loadpdf("data/TiO2_fine_qmax26.gr") - ###### Set up extraction parameters. + # Set up extraction parameters. # In this section we'll examine the major extraction parameters in detail. # diffpy.srmise strives to provide reasonable default values for these # parameters. For normal use setting the range, baseline, and uncertainty # should be sufficient. kwds = {} - ## Range + # Range # Range defaults to the entire PDF if not specified. kwds["rng"] = [1.5, 10.0] - ## dg + # dg # diffpy.srmise selects model complexity based primarily on the uncertainty # of the PDF. Note that very small uncertainties (<1%) can make peak # extraction excessively slow. In general, the smaller the uncertainty the @@ -80,7 +80,7 @@ def run(plot=True): # 1273-1283. doi:10.1107/S1600576714010516 kwds["dg"] = 0.35 # Play with this value! - ## baseline + # baseline # As a crystal PDF, a linear baseline crossing the origin is appropriate. # Here we define the linear baseline B(r) = -.5*r + 0, and explicitly set # the y-intercept as a fixed parameter which will not be fit. For @@ -91,7 +91,7 @@ def run(plot=True): slope = -0.65 # Play with this value! y_intercept = 0.0 kwds["baseline"] = blfunc.actualize([slope, y_intercept], free=[True, False]) - ## pf + # pf # The pf (peakfunction) parameter allows setting the shape of peaks to be # extracted. Termination effects are added automatically to the peak # function during extraction. In the harmonic approximation of atomic @@ -109,7 +109,7 @@ def run(plot=True): pf = GaussianOverR(0.7) kwds["pf"] = [pf] # Despite the list, only one entry is currently supported. - ## qmax + # qmax # PDFs typically report the value of qmax (i.e. the maximum momentum # transfer q in the measurement), but it can be specified explicitly also. # If the PDF does not report qmax, diffpy.srmise attempts to estimate it @@ -119,7 +119,7 @@ def run(plot=True): # diffpy.srmise does not consider Nyquist sampling or termination effects. kwds["qmax"] = 26.0 - ## nyquist + # nyquist # This parameter governs whether diffpy.srmise attempts to find a model # on a Nyquist-sampled grid with dr=pi/qmax, which is a grid where data # uncertainties are least correlated without loss of information. By @@ -132,7 +132,7 @@ def run(plot=True): # doi:10.1103/PhysRevB.84.134105 kwds["nyquist"] = True - ## supersample + # supersample # This parameter dictates the data be oversampled by at least this factor # (relative to the Nyquist rate) during the early stages of peak # extraction. If the input PDF is even more finely sampled, that level of @@ -141,7 +141,7 @@ def run(plot=True): # finding and clustering process, but reduces speed. kwds["supersample"] = 4.0 - ## cres + # cres # The cres (clustering resolution) parameter governs the sensitivity of the # clustering method used by diffpy.srmise. In short, when the data are # being clustered, data which are further than the clustering resolution @@ -156,7 +156,7 @@ def run(plot=True): # Apply peak extraction parameters. ppe.setvars(**kwds) - ## initial_peaks + # initial_peaks # Initial peaks are peaks which are kept fixed during the early stages of # peak extraction, effectively condition results upon their values. Since # initial peaks are sometimes dependent on other SrMise parameters (e.g. @@ -168,7 +168,7 @@ def run(plot=True): # diffpy.srmise estimate the peak parameters. # 2) Explicit specification of peak parameters. - ## Initial peaks from approximate positions. + # Initial peaks from approximate positions. # This routine estimates peak parameters by finding the peak-like cluster # containing the specified point. It does not search for occluded peaks, # so works best on well-separated peaks. It does, however, take any @@ -177,7 +177,7 @@ def run(plot=True): for p in positions: ppe.estimate_peak(p) # adds to initial_peaks - ## Initial peaks from explicit parameters. + # Initial peaks from explicit parameters. # Adding initial peaks explicitly is similar to defining a baseline. # Namely, choosing a peak function and then actualizing it with given # parameters. For this example peaks are created from the same GaussianOverR @@ -194,22 +194,22 @@ def run(plot=True): peaks.append(pf.actualize(p, free=[True, False, True], in_format="pwa")) ppe.add_peaks(peaks) # adds to initial_peaks - ## Initial peaks and pruning + # Initial peaks and pruning # While initial peaks condition what other peaks can be extracted, by # default they can also be pruned if a simpler model appears better. To # prevent this, they can be set as non-removable. for ip in ppe.initial_peaks: ip.removable = False - ## Plot initial parameters + # Plot initial parameters if plot: makeplot(ppe) plt.title("Initial Peaks") - ###### Perform peak extraction + # Perform peak extraction ppe.extract() - ## Save output + # Save output # The write() method saves a file which preserves all aspects of peak # extraction and its results, by convention using the .srmise extension, # and which can later be read by diffpy.srmise. @@ -222,7 +222,7 @@ def run(plot=True): ppe.write("output/parameter_summary.srmise") ppe.writepwa("output/parameter_summary.pwa") - ## Plot results. + # Plot results. # Display plot of extracted peak. It is also possible to plot an existing # .srmise file from the command line using # srmise output/TiO2_parameterdetail.srmise --no-extract --plot diff --git a/doc/examples/query_results.py b/doc/examples/query_results.py index c861ee5..c012567 100644 --- a/doc/examples/query_results.py +++ b/doc/examples/query_results.py @@ -29,13 +29,13 @@ import matplotlib.pyplot as plt import numpy as np -from diffpy.srmise import ModelCovariance, PDFPeakExtraction -from diffpy.srmise.applications.plot import makeplot +from diffpy.srmise.modelcluster import ModelCovariance +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction def run(plot=True): - ## Initialize peak extraction + # Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() @@ -49,23 +49,22 @@ def run(plot=True): ppebl.read("output/extract_single_peak.srmise") baseline = ppebl.extracted.baseline - ## Set up extraction parameters. + # Set up extraction parameters. # Peaks are extracted between 2 and 10 angstroms, using the baseline # from the isolated peak example. kwds = {} - kwds["rng"] = [2.0, 10.] + kwds["rng"] = [2.0, 10.0] kwds["baseline"] = baseline # Apply peak extraction parameters. ppe.setvars(**kwds) - ## Perform peak extraction, and retain object containing a copy of the + # Perform peak extraction, and retain object containing a copy of the # model and the full covariance matrix. cov = ppe.extract() - - print "\n======= Accessing SrMise Results ========" - ## Accessing results of extraction + print("\n======= Accessing SrMise Results ========") + # Accessing results of extraction # # Model parameters are organized using a nested structure, with a list # of peaks each of which is a list of parameters, similar to the the @@ -90,44 +89,40 @@ def run(plot=True): # peak. Thus, this parameter can be referenced as (1,2). Several examples # are presented below. - - print "\n------ Parameter values and uncertainties ------" + print("\n------ Parameter values and uncertainties ------") # ModelCovariance.get() returns a (value, uncertainty) tuple for a given # parameter. These are the results for the nearest-neighbor peak. - p0 = cov.get((0,0)) - w0 = cov.get((0,1)) - a0 = cov.get((0,2)) - print "Nearest-neighbor peak: " - print " position = %f +/- %f" %p0 - print " width = %f +/- %f" %w0 - print " area = %f +/- %f" %a0 - print " Covariance(width, area) = ", cov.getcovariance((0,1),(0,2)) + p0 = cov.get((0, 0)) + w0 = cov.get((0, 1)) + a0 = cov.get((0, 2)) + print("Nearest-neighbor peak: ") + print(" position = %f +/- %f" % p0) + print(" width = %f +/- %f" % w0) + print(" area = %f +/- %f" % a0) + print(" Covariance(width, area) = ", cov.getcovariance((0, 1), (0, 2))) # Baseline parameters. By convention, baseline is final element in cov. (slope, intercept) = cov.model[-1] - print "\nThe linear baseline B(r)=%f*r + %f" \ - % tuple(par for par in cov.model[-1]) + print("\nThe linear baseline B(r)=%f*r + %f" % tuple(par for par in cov.model[-1])) - - print "\n ------ Uncertainties from a Saved File --------" + print("\n ------ Uncertainties from a Saved File --------") # A .srmise file does not save the full covariance matrix, so it must be # recalculated when loading from these files. For example, here is the # nearest-neighbor peak in the file which we used to define the initial # baseline. cov2 = ModelCovariance() ppebl.extracted.fit(fitbaseline=True, cov=cov2, cov_format="default_output") - p0_saved = cov2.get((0,0)) - w0_saved = cov2.get((0,1)) - a0_saved = cov2.get((0,2)) - print "Nearest-neighbor peak:" - print " position = %f +/- %f" %p0_saved - print " width == %f +/- %f" %w0_saved - print " area = = %f +/- %f" %a0_saved - print " Covariance(width, area) = ", cov2.getcovariance((0,1),(0,2)) - - - print "\n ---------- Alternate Parameterizations ---------" - ## Different Parameterizations + p0_saved = cov2.get((0, 0)) + w0_saved = cov2.get((0, 1)) + a0_saved = cov2.get((0, 2)) + print("Nearest-neighbor peak:") + print(" position = %f +/- %f" % p0_saved) + print(" width == %f +/- %f" % w0_saved) + print(" area = = %f +/- %f" % a0_saved) + print(" Covariance(width, area) = ", cov2.getcovariance((0, 1), (0, 2))) + + print("\n ---------- Alternate Parameterizations ---------") + # Different Parameterizations # Peaks and baselines may have equivalent parameterizations that are useful # in different situations. For example, the types defined by the # GaussianOverR peak function are: @@ -151,26 +146,24 @@ def run(plot=True): # would transform the second, third, and fourth peaks). If the keyword # is omitted, the transformation is attempted for all parts of the fit. cov.transform(in_format="pwa", out_format="mu_sigma_area", parts="peaks") - print "Width (sigma) of nearest-neighbor peak: %f +/- %f" %cov.get((0,1)) - + print("Width (sigma) of nearest-neighbor peak: %f +/- %f" % cov.get((0, 1))) - print "\n ------------ Highly Correlated Parameters ------------" + print("\n ------------ Highly Correlated Parameters ------------") # Highly-correlated parameters can indicate difficulties constraining the # fit. This function lists all pairs of parameters with an absolute value # of correlation which exceeds a given threshold. - print "|Correlation| > 0.9:" - print "par1 par2 corr(par1, par2)" - print "\n".join(str(c) for c in cov.correlationwarning(.9)) + print("|Correlation| > 0.9:") + print("par1 par2 corr(par1, par2)") + print("\n".join(str(c) for c in cov.correlationwarning(0.9))) - - print "\n-------- Estimate coordination shell occupancy ---------" + print("\n-------- Estimate coordination shell occupancy ---------") # Estimate the scale factor and its uncertainty from first peak's intensity. # G_normalized = scale * G_observed # dscale = scale * dG_observed/G_observed - scale = 12./a0[0] - dscale = scale * a0[1]/a0[0] - print "Estimate scale factor assuming nearest-neighbor intensity = 12" - print "Scale factor is %f +/- %f" %(scale, dscale) + scale = 12.0 / a0[0] + dscale = scale * a0[1] / a0[0] + print("Estimate scale factor assuming nearest-neighbor intensity = 12") + print("Scale factor is %f +/- %f" % (scale, dscale)) # Reference for number of atoms in coordination shells for FCC. # http://chem-faculty.lsu.edu/watkins/MERLOT/cubic_neighbors/cubic_near_neighbors.html @@ -178,36 +171,34 @@ def run(plot=True): # Calculated the scaled intensities and uncertainties. intensity = [] - for i in range(0, len(cov.model)-1): - (area, darea) = cov.get((i,2)) + for i in range(0, len(cov.model) - 1): + (area, darea) = cov.get((i, 2)) area *= scale - darea = area*np.sqrt((dscale/scale)**2 + (darea/area)**2) + darea = area * np.sqrt((dscale / scale) ** 2 + (darea / area) ** 2) intensity.append((ideal_intensity[i], area, darea)) - print "\nIntensity" - print "Ideal: Estimated" + print("\nIntensity") + print("Ideal: Estimated") for i in intensity: - print "%i: %f +/- %f" %i + print("%i: %f +/- %f" % i) - print "\nTotal intensity" + print("\nTotal intensity") # It is possible to iterate over peaks directly without using indices. # In addition, peak parameters can be accessed using string keys. For the # Gaussian over r all of "position", "width", and "area" are valid. total_observed_intensity = 0 total_ideal_intensity = 0 for peak, ii in zip(cov.model[:-1], ideal_intensity): - total_observed_intensity += scale*peak["area"] + total_observed_intensity += scale * peak["area"] total_ideal_intensity += ii - print "Ideal: Observed (using estimated scale factor)" - print "%i: %f" %(total_ideal_intensity, total_observed_intensity) - + print("Ideal: Observed (using estimated scale factor)") + print("%i: %f" % (total_ideal_intensity, total_observed_intensity)) - ## Save output + # Save output ppe.write("output/query_results.srmise") ppe.writepwa("output/query_results.pwa") - - ## Evaluating a model. + # Evaluating a model. # Although the ModelCovariance object is useful, the model used for fitting # can be directly accessed through PDFPeakExtraction as well, albeit # without uncertainties. This is particularly helpful when evaluating a @@ -217,14 +208,15 @@ def run(plot=True): # peaks are kept separate. if plot: plt.figure() - grid = np.arange(2, 10, .01) + grid = np.arange(2, 10, 0.01) bl = ppe.extracted.baseline everysecondpeak = ppe.extracted.model[::2] - plt.plot(ppe.x, ppe.y, 'o') + plt.plot(ppe.x, ppe.y, "o") for peak in everysecondpeak: plt.plot(grid, bl.value(grid) + peak.value(grid)) plt.xlim(2, 10) plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..2be8306 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build +set SPHINXPROJ=PackagingScientificPython + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/doc/manual/source/conf.py b/doc/manual/source/conf.py index 965010c..d856aef 100644 --- a/doc/manual/source/conf.py +++ b/doc/manual/source/conf.py @@ -16,6 +16,8 @@ import sys import time +from setup import versiondata + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. @@ -59,7 +61,6 @@ # |version| and |release|, also used in various other places throughout the # built documents. sys.path.insert(0, os.path.abspath("../../..")) -from setup import versiondata fullversion = versiondata.get("DEFAULT", "version") # The short X.Y version. @@ -205,11 +206,11 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', + # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', + # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. - #'preamble': '', + # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples diff --git a/doc/source/_static/.placeholder b/doc/source/_static/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/doc/source/api/diffpy.srmise.applications.rst b/doc/source/api/diffpy.srmise.applications.rst new file mode 100644 index 0000000..87413c9 --- /dev/null +++ b/doc/source/api/diffpy.srmise.applications.rst @@ -0,0 +1,28 @@ +:tocdepth: -1 + +diffpy.srmise.applications package +================================== + +.. automodule:: diffpy.srmise.applications + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +diffpy.srmise.applications.plot module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.applications.plot + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.applications.extract module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.applications.extract + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/api/diffpy.srmise.baselines.rst b/doc/source/api/diffpy.srmise.baselines.rst new file mode 100644 index 0000000..5e9b791 --- /dev/null +++ b/doc/source/api/diffpy.srmise.baselines.rst @@ -0,0 +1,52 @@ +:tocdepth: -1 + +diffpy.srmise.baselines package +=============================== + +.. automodule:: diffpy.srmise.baselines + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +diffpy.srmise.baselines.nanospherical module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.baselines.nanospherical + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.baselines.arbitrary module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.baselines.arbitrary + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.baselines.fromsequence module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.baselines.fromsequence + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.baselines.polynomial module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.baselines.polynomial + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.baselines.base module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.baselines.base + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/api/diffpy.srmise.modelevaluators.rst b/doc/source/api/diffpy.srmise.modelevaluators.rst new file mode 100644 index 0000000..012c60a --- /dev/null +++ b/doc/source/api/diffpy.srmise.modelevaluators.rst @@ -0,0 +1,36 @@ +:tocdepth: -1 + +diffpy.srmise.modelevaluators package +===================================== + +.. automodule:: diffpy.srmise.modelevaluators + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +diffpy.srmise.modelevaluators.aic module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.modelevaluators.aic + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.modelevaluators.aicc module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.modelevaluators.aicc + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.modelevaluators.base module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.modelevaluators.base + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/api/diffpy.srmise.peaks.rst b/doc/source/api/diffpy.srmise.peaks.rst new file mode 100644 index 0000000..b88e831 --- /dev/null +++ b/doc/source/api/diffpy.srmise.peaks.rst @@ -0,0 +1,44 @@ +:tocdepth: -1 + +diffpy.srmise.peaks package +=========================== + +.. automodule:: diffpy.srmise.peaks + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +diffpy.srmise.peaks.gaussianoverr module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.peaks.gaussianoverr + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.peaks.terminationripples module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.peaks.terminationripples + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.peaks.base module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.peaks.base + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.peaks.gaussian module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.peaks.gaussian + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/api/diffpy.srmise.rst b/doc/source/api/diffpy.srmise.rst new file mode 100644 index 0000000..98ea790 --- /dev/null +++ b/doc/source/api/diffpy.srmise.rst @@ -0,0 +1,111 @@ +:tocdepth: -1 + +diffpy.srmise package +===================== + +.. automodule:: diffpy.srmise + :members: + :undoc-members: + :show-inheritance: + +Subpackages +----------- + +.. toctree:: + :titlesonly: + + diffpy.srmise.peaks + diffpy.srmise.modelevaluators + diffpy.srmise.applications + diffpy.srmise.baselines + +Submodules +---------- + +diffpy.srmise.multimodelselection module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.multimodelselection + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.srmiselog module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.srmiselog + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.modelparts module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.modelparts + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.peakstability module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.peakstability + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.basefunction module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.basefunction + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.pdfpeakextraction module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.pdfpeakextraction + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.modelcluster module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.modelcluster + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.dataclusters module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.dataclusters + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.srmiseerrors module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.srmiseerrors + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.peakextraction module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.peakextraction + :members: + :undoc-members: + :show-inheritance: + +diffpy.srmise.pdfdataset module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: diffpy.srmise.pdfdataset + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000..65d3685 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# diffpy.srmise documentation build configuration file, created by +# sphinx-quickstart on Thu Jan 30 15:49:41 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import time +from importlib.metadata import version +from pathlib import Path + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use Path().resolve() to make it absolute, like shown here. +# sys.path.insert(0, str(Path(".").resolve())) +sys.path.insert(0, str(Path("../..").resolve())) +sys.path.insert(0, str(Path("../../src").resolve())) + +# abbreviations +ab_authors = "Billinge Group members and community contributors" + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.todo", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "sphinx_rtd_theme", + "m2r", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = [".rst", ".md"] + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "diffpy.srmise" +copyright = "%Y, The Trustees of Columbia University in the City of New York" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. + +fullversion = version(project) +# The short X.Y version. +version = "".join(fullversion.split(".post")[:1]) +# The full version, including alpha/beta/rc tags. +release = fullversion + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +today = time.strftime("%B %d, %Y", time.localtime()) +year = today.split()[-1] +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' +# substitute YEAR in the copyright string +copyright = copyright.replace("%Y", year) + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ["build"] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# A list of ignored prefixes for module index sorting. +modindex_common_prefix = ["diffpy.srmise"] + +# Display all warnings for missing links. +nitpicky = True + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +html_theme_options = { + "navigation_with_keys": "true", +} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +# html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Output file base name for HTML help builder. +basename = "diffpy.srmise".replace(" ", "").replace(".", "") +htmlhelp_basename = basename + "doc" + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ("index", "diffpy.srmise.tex", "diffpy.srmise Documentation", ab_authors, "manual"), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [("index", "diffpy.srmise", "diffpy.srmise Documentation", ab_authors, 1)] + +# If true, show URL addresses after external links. +# man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + "index", + "diffpy.srmise", + "diffpy.srmise Documentation", + ab_authors, + "diffpy.srmise", + "One line description of project.", + "Miscellaneous", + ), +] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + + +# Example configuration for intersphinx: refer to the Python standard library. +# intersphinx_mapping = {'http://docs.python.org/': None} diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000..89fa460 --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,44 @@ +####### +|title| +####### + +.. |title| replace:: diffpy.srmise documentation + +diffpy.srmise - Peak extraction and peak fitting tool for atomic pair distribution functions.. + +| Software version |release|. +| Last updated |today|. + +======= +Authors +======= + +diffpy.srmise is developed by Billinge Group +and its community contributors. + +For a detailed list of contributors see +https://github.com/diffpy/diffpy.srmise/graphs/contributors. + +============ +Installation +============ + +See the `README `_ +file included with the distribution. + +================= +Table of contents +================= +.. toctree:: + :titlesonly: + + license + release + Package API + +======= +Indices +======= + +* :ref:`genindex` +* :ref:`search` diff --git a/doc/source/license.rst b/doc/source/license.rst new file mode 100644 index 0000000..cfab61c --- /dev/null +++ b/doc/source/license.rst @@ -0,0 +1,39 @@ +:tocdepth: -1 + +.. index:: license + +License +####### + +OPEN SOURCE LICENSE AGREEMENT +============================= +BSD 3-Clause License + +Copyright (c) 2024, The Trustees of Columbia University in +the City of New York. +All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/doc/source/release.rst b/doc/source/release.rst new file mode 100644 index 0000000..27cd0cc --- /dev/null +++ b/doc/source/release.rst @@ -0,0 +1,5 @@ +:tocdepth: -1 + +.. index:: release notes + +.. include:: ../../CHANGELOG.rst diff --git a/news/TEMPLATE.rst b/news/TEMPLATE.rst new file mode 100644 index 0000000..790d30b --- /dev/null +++ b/news/TEMPLATE.rst @@ -0,0 +1,23 @@ +**Added:** + +* + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/news/cookiecut.rst b/news/cookiecut.rst new file mode 100644 index 0000000..25b8176 --- /dev/null +++ b/news/cookiecut.rst @@ -0,0 +1,23 @@ +**Added:** + +* + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* Cookiecuttered diffpy.srmise to new Billingegroup standard. + +**Security:** + +* diff --git a/news/cookiecutter.rst b/news/cookiecutter.rst new file mode 100644 index 0000000..64626aa --- /dev/null +++ b/news/cookiecutter.rst @@ -0,0 +1,23 @@ +**Added:** + +* + +**Changed:** + +* Moved diffpy.srmise from python2 to python3. + +**Deprecated:** + +* + +**Removed:** + +* Removed travis.yml and other useless files + +**Fixed:** + +* Fixed numpy format boolean counting, numpy int slicing error. + +**Security:** + +* diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3c8f23b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,75 @@ +[build-system] +requires = ["setuptools>=62.0", "setuptools-git-versioning<2"] +build-backend = "setuptools.build_meta" + +[project] +name = "diffpy.srmise" +dynamic=['version'] +authors = [ + { name="Simon J.L. Billinge group", email="simon.billinge@gmail.com" }, + {name="Luke Granlund", email="granlund@pa.msu.edu"}, +] +maintainers = [ + { name="Simon J.L. Billinge group", email="simon.billinge@gmail.com" }, +] +description = "Peak extraction and peak fitting tool for atomic pair distribution functions." +keywords = ['peak extraction fitting PDF AIC multimodeling'] +readme = "README.rst" +requires-python = ">=3.10" +classifiers = [ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'Intended Audience :: Education', + 'License :: OSI Approved :: BSD License', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX', + 'Operating System :: Unix', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Topic :: Scientific/Engineering :: Physics', + 'Topic :: Scientific/Engineering :: Chemistry', + 'Topic :: Software Development :: Libraries', +] + +[project.urls] +Homepage = "https://github.com/diffpy/diffpy.srmise/" +Issues = "https://github.com/diffpy/diffpy.srmise/issues/" + +[tool.setuptools-git-versioning] +enabled = true +template = "{tag}" +dev_template = "{tag}" +dirty_template = "{tag}" + +[tool.setuptools.packages.find] +where = ["src"] # list of folders that contain the packages (["."] by default) +include = ["*"] # package names should match these glob patterns (["*"] by default) +exclude = ["diffpy.srmise.tests*"] # exclude packages matching these glob patterns (empty by default) +namespaces = false # to disable scanning PEP 420 namespaces (true by default) + +[tool.black] +line-length = 115 +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | \.rst + | \.txt + | _build + | buck-out + | build + | dist + + # The following are specific to Black, you probably don't want those. + | blib2to3 + | tests/data +)/ +''' diff --git a/requirements/build.txt b/requirements/build.txt new file mode 100644 index 0000000..f72d870 --- /dev/null +++ b/requirements/build.txt @@ -0,0 +1,2 @@ +python +setuptools diff --git a/requirements/docs.txt b/requirements/docs.txt new file mode 100644 index 0000000..ab17b1c --- /dev/null +++ b/requirements/docs.txt @@ -0,0 +1,4 @@ +sphinx +sphinx_rtd_theme +doctr +m2r diff --git a/requirements/pip.txt b/requirements/pip.txt new file mode 100644 index 0000000..e69de29 diff --git a/requirements/run.txt b/requirements/run.txt new file mode 100644 index 0000000..1b57b14 --- /dev/null +++ b/requirements/run.txt @@ -0,0 +1,3 @@ +numpy +scipy +matplotlib-base diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 0000000..6f9ccf8 --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,5 @@ +flake8 +pytest +codecov +coverage +pytest-env diff --git a/setup.py b/setup.py deleted file mode 100755 index e0e9b14..0000000 --- a/setup.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python - -# Installation script for diffpy.srmise - -"""diffpy.srmise - Peak extraction/fitting tool for pair distribution functions - -Packages: diffpy.srmise -""" - -import os - -from setuptools import find_packages, setup - -# versioncfgfile holds version data for git commit hash and date. -# It must reside in the same directory as version.py. -MYDIR = os.path.dirname(os.path.abspath(__file__)) -versioncfgfile = os.path.join(MYDIR, "diffpy/srmise/version.cfg") - - -def gitinfo(): - from subprocess import PIPE, Popen - - kw = dict(stdout=PIPE, cwd=MYDIR) - proc = Popen(["git", "describe", "--match=v[[:digit:]]*"], **kw) - desc = proc.stdout.read() - proc = Popen(["git", "log", "-1", "--format=%H %at %ai"], **kw) - glog = proc.stdout.read() - rv = {} - rv["version"] = "-".join(desc.strip().split("-")[:2]).lstrip("v") - rv["commit"], rv["timestamp"], rv["date"] = glog.strip().split(None, 2) - return rv - - -def getversioncfg(): - from ConfigParser import SafeConfigParser - - cp = SafeConfigParser() - cp.read(versioncfgfile) - gitdir = os.path.join(MYDIR, ".git") - if not os.path.isdir(gitdir): - return cp - try: - g = gitinfo() - except OSError: - return cp - d = cp.defaults() - if g["version"] != d.get("version") or g["commit"] != d.get("commit"): - cp.set("DEFAULT", "version", g["version"]) - cp.set("DEFAULT", "commit", g["commit"]) - cp.set("DEFAULT", "date", g["date"]) - cp.set("DEFAULT", "timestamp", g["timestamp"]) - cp.write(open(versioncfgfile, "w")) - return cp - - -versiondata = getversioncfg() - -# define distribution, but make this module importable -setup_args = dict( - name="diffpy.srmise", - version=versiondata.get("DEFAULT", "version"), - namespace_packages=["diffpy"], - packages=find_packages(), - include_package_data=True, - zip_safe=False, - # Dependencies - # numpy - # scipy - # matplotlib >= 1.1.0 - install_requires=["matplotlib >= 1.1.0", "numpy", "scipy"], - # other arguments here... - entry_points={ - "console_scripts": [ - "srmise = diffpy.srmise.applications.extract:main", - "srmiseplot = diffpy.srmise.applications.plot:main", - ] - }, - author="Luke Granlund", - author_email="luke.r.granlund@gmail.com", - description=( - "Peak extraction and peak fitting tool for atomic " - "pair distribution functions." - ), - license="BSD-style license", - url="https://github.com/diffpy/diffpy.srmise/", - keywords="peak extraction fitting PDF AIC multimodeling", - classifiers=[ - # List of possible values at - # http://pypi.python.org/pypi?:action=list_classifiers - "Development Status :: 3 - Alpha", - "Environment :: Console", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Operating System :: MacOS", - "Operating System :: POSIX", - "Operating System :: Microsoft :: Windows", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", - "Topic :: Scientific/Engineering :: Chemistry", - "Topic :: Scientific/Engineering :: Physics", - "Topic :: Software Development :: Libraries", - ], -) - -if __name__ == "__main__": - setup(**setup_args) diff --git a/src/diffpy/__init__.py b/src/diffpy/__init__.py new file mode 100644 index 0000000..377a0f9 --- /dev/null +++ b/src/diffpy/__init__.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +############################################################################## +# +# (c) 2024 The Trustees of Columbia University in the City of New York. +# All rights reserved. +# +# File coded by: Billinge Group members and community contributors. +# +# See GitHub contributions for a more detailed list of contributors. +# https://github.com/diffpy/diffpy.srmise/graphs/contributors +# +# See LICENSE.rst for license information. +# +############################################################################## + +"""Blank namespace package for module diffpy.""" + + +from pkgutil import extend_path + +__path__ = extend_path(__path__, __name__) + +# End of file diff --git a/src/diffpy/srmise/__init__.py b/src/diffpy/srmise/__init__.py new file mode 100644 index 0000000..65eb42b --- /dev/null +++ b/src/diffpy/srmise/__init__.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +############################################################################## +# +# (c) 2024 The Trustees of Columbia University in the City of New York. +# All rights reserved. +# +# File coded by: Billinge Group members and community contributors. +# +# See GitHub contributions for a more detailed list of contributors. +# https://github.com/diffpy/diffpy.srmise/graphs/contributors +# +# See LICENSE.rst for license information. +# +############################################################################## + +"""Peak extraction and peak fitting tool for atomic pair distribution functions.""" + +# package version +from diffpy.srmise.version import __version__ + +# silence the pyflakes syntax checker +assert __version__ or True + +# End of file diff --git a/diffpy/srmise/modelevaluators/__init__.py b/src/diffpy/srmise/applications/__init__.py similarity index 81% rename from diffpy/srmise/modelevaluators/__init__.py rename to src/diffpy/srmise/applications/__init__.py index 4cda42c..5540acb 100644 --- a/diffpy/srmise/modelevaluators/__init__.py +++ b/src/diffpy/srmise/applications/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -10,8 +11,3 @@ # See LICENSE.txt for license information. # ############################################################################## - -__all__ = ["base", "aic", "aicc"] - -from aic import AIC -from aicc import AICc diff --git a/src/diffpy/srmise/applications/extract.py b/src/diffpy/srmise/applications/extract.py new file mode 100755 index 0000000..9d08e9f --- /dev/null +++ b/src/diffpy/srmise/applications/extract.py @@ -0,0 +1,647 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014-2015 trustees of the Michigan State University. +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## + +import textwrap +from optparse import IndentedHelpFormatter, OptionGroup, OptionParser + +import matplotlib.pyplot as plt +import numpy as np + + +def main(): + """Default SrMise entry-point.""" + + usage = ( + "usage: %prog pdf_file [options]\n" + "pdf_file is a file containing a PDF (accepts several " + "common formats), or a .srmise file." + ) + + from diffpy.srmise import __version__ + + version = "diffpy.srmise " + __version__ + + descr = ( + "The SrMise package is a tool to aid extracting and fitting peaks " + "that comprise a pair distribution function. This script exposes " + "basic peak extraction functionality. For many PDFs it is " + "sufficient to specify the range, baseline, and sometimes an ad " + "hoc uncertainty. See the discussion of these options below for " + "further guidance." + ) + + epilog = ( + "Options set above override those from an existing .srmise " + "file, as well as the usual defaults summarized here.\n\n" + "Defaults (when qmax > 0)\n" + "------------------------\n" + "baseline - None (identically 0).\n" + "dg - The uncertainty reported in the PDF (if any), otherwise " + "5% of maximum value of PDF.\n" + "nyquist - True\n" + "range - All the data\n" + "cres - The Nyquist rate.\n" + "supersample - 4.0\n" + "scale - (Deprecated) False\n\n" + "Defaults (when qmax = 0)\n" + "------------------------\n" + "baseline - as above\n" + "dg - as above\n" + "nyquist - False (and no effect if True)\n" + "range - as above\n" + "cres - Four times the average distance between data points\n" + "supersample - Parameter has no effect.\n" + "scale - (Deprecated) False, and no effect if True\n\n" + "Known issues\n" + "------------\n" + "1) Peak extraction works best when the data are moderately " + "oversampled first. When qmax > 0 this is handled " + "automatically, but when qmax = 0 no resampling of any kind is " + "performed.\n" + "2) Peak extraction performed on a PDF file and a .srmise file " + "derived from that data with identical extraction parameters " + "can give different results even on the same platform. This is " + "because the original data may undergo some processing before it " + "can be saved by SrMise. For consistent results, always specify " + "the original PDF, or always load the PDF from a .srmise file " + "you save before performing any peak extraction on that data.\n" + "3) Liveplotting depends on the matplotlib backend, and doesn't " + "implement an idle handler, so interaction with its window will " + "likely cause a freeze." + ) + + # TODO: Move to argparse (though not in 2.6 by default) to handle + # variable-length options without callbacks. Longterm, the major + # value is using the same option to specify a baseline that should + # use estimation vs. one that should use explicitly provided pars. + parser = OptionParser( + usage=usage, + description=descr, + epilog=epilog, + version=version, + formatter=IndentedHelpFormatterWithNL(), + ) + + parser.set_defaults( + plot=False, + liveplot=False, + wait=False, + performextraction=True, + verbosity="warning", + ) + dg_defaults = { + "absolute": None, + "data": None, + "max-fraction": 0.05, + "ptp-fraction": 0.05, + "dG-fraction": 1.0, + } + + parser.add_option( + "--extract", + action="store_true", + dest="performextraction", + help="[Default] Perform extraction.", + ) + parser.add_option( + "--no-extract", + action="store_false", + dest="performextraction", + help="Do not perform extraction.", + ) + parser.add_option( + "--range", + nargs=2, + dest="rng", + type="float", + metavar="rmin rmax", + help="Extract over the range (rmin, rmax).", + ) + parser.add_option( + "--qmax", + dest="qmax", + type="string", + metavar="QMAX", + help="Model peaks with this maximum q value.", + ) + parser.add_option( + "--nyquist", + action="store_true", + dest="nyquist", + help="Use Nyquist resampling if qmax > 0.", + ) + parser.add_option( + "--no-nyquist", + action="store_false", + dest="nyquist", + help="Do not use Nyquist resampling.", + ) + parser.add_option( + "--pf", + dest="peakfunction", + metavar="PF", + help="Fit peak function PF defined in " "diffpy.srmise.peaks, e.g. " "'GaussianOverR(maxwidth=0.7)'", + ) + parser.add_option( + "--cres", + dest="cres", + type="float", + metavar="cres", + help="Clustering resolution.", + ) + parser.add_option( + "--supersample", + dest="supersample", + type="float", + metavar="SS", + help="Minimum initial oversampling rate as multiple of " "Nyquist rate.", + ) + parser.add_option( + "--me", + "-m", + dest="modelevaluator", + metavar="ME", + help="ModelEvaluator defined in " "diffpy.srmise.modelevaluators, e.g. 'AIC'", + ) + + group = OptionGroup( + parser, + "Baseline Options", + "SrMise cannot determine the appropriate type of " + "baseline (e.g. crystalline vs. some nanoparticle) " + "solely from the data, so the user should specify the " + "appropriate type and/or parameters. (Default is " + "identically 0, which is unphysical.) SrMise keeps the " + "PDF baseline fixed at its initial value until the " + "final stages of peak extraction, so results are " + "frequently conditioned on that choice. (See the " + "SrMise documentation for details.) A good estimate " + "is therefore important for best results. SrMise can " + "estimate initial parameters from the data for linear " + "baselines in some situations (all peaks are positive, " + "and the degree of overlap in the region of extraction " + "is not too great), but in most cases it is best to " + "provide reasonable initial parameters. Run 'srmise " + "pdf_file.gr [baseline_option] --no-extract --plot' " + "for different values of the parameters for rapid " + "visual estimation.", + ) + group.add_option( + "--baseline", + dest="baseline", + metavar="BL", + help="Estimate baseline from baseline function BL " + "defined in diffpy.srmise.baselines, e.g. " + "'Polynomial(degree=1)'. All parameters are free. " + "(Many POSIX shells attempt to interpret the " + "parentheses, and on these shells the option should " + "be surrounded by quotation marks.)", + ) + group.add_option( + "--bcrystal", + dest="bcrystal", + type="string", + metavar="rho0[c]", + help="Use linear baseline defined by crystal number " + "density rho0. Append 'c' to make parameter " + "constant. Equivalent to " + "'--bpoly1 -4*pi*rho0[c] 0c'.", + ) + group.add_option( + "--bsrmise", + dest="bsrmise", + type="string", + metavar="file", + help="Use baseline from specified .srmise file.", + ) + group.add_option( + "--bpoly0", + dest="bpoly0", + type="string", + metavar="a0[c]", + help="Use constant baseline given by y=a0. " "Append 'c' to make parameter constant.", + ) + group.add_option( + "--bpoly1", + dest="bpoly1", + type="string", + nargs=2, + metavar="a1[c] a0[c]", + help="Use baseline given by y=a1*x + a0. Append 'c' to " "make parameter constant.", + ) + group.add_option( + "--bpoly2", + dest="bpoly2", + type="string", + nargs=3, + metavar="a2[c] a1[c] a0[c]", + help="Use baseline given by y=a2*x^2+a1*x + a0. Append " "'c' to make parameter constant.", + ) + group.add_option( + "--bseq", + dest="bseq", + type="string", + metavar="FILE", + help="Use baseline interpolated from x,y values in FILE. " "This baseline has no free parameters.", + ) + group.add_option( + "--bspherical", + dest="bspherical", + type="string", + nargs=2, + metavar="s[c] r[c]", + help="Use spherical nanoparticle baseline with scale s " + "and radius r. Append 'c' to make parameter " + "constant.", + ) + parser.add_option_group(group) + + group = OptionGroup( + parser, + "Uncertainty Options", + "Ideally a PDF reports the accurate experimentally " + "determined uncertainty. In practice, many PDFs " + "report none, while for others the reported values " + "are not necessarily reliable. (If in doubt, ask your " + "friendly neighborhood diffraction expert!) Even when " + "uncertainties are accurate, it can be " + "pragmatically useful to see how the results of " + "peak extraction change when assuming a different " + "value. Nevertheless, the primary determinant of " + "model complexity in SrMise is the uncertainty, so an " + "ad hoc uncertainty yields ad hoc model complexity. " + "See the SrMise documentation for further discussion, " + "including methods to mitigate this issue with " + "multimodel selection.", + ) + group.add_option( + "--dg-mode", + dest="dg_mode", + type="choice", + choices=["absolute", "data", "max-fraction", "ptp-fraction", "dG-fraction"], + help="Define how values passed to '--dg' are treated. " + "Possible values are: \n" + "'absolute' - The actual uncertainty in the PDF.\n" + "'max-fraction' - Fraction of max value in PDF.\n" + "'ptp-fraction' - Fraction of max minus min value " + "in the PDF.\n" + "'dG-fraction' - Fraction of dG reported by PDF.\n" + "If '--dg' is specified but mode is not, then mode " + "ia absolute. Otherwise, 'dG-fraction' is default " + "if the PDF reports uncertaintes, and 'max-fraction' " + "ia default if it does not.", + ) + group.add_option( + "--dg", + dest="dg", + type="float", + help="Perform extraction assuming uncertainty dg. " + "Defaults depend on --dg-mode as follows:\n" + "'absolute'=%s\n" + "'max-fraction'=%s\n" + "'ptp-fraction'=%s\n" + "'dG-fraction'=%s" + % ( + dg_defaults["absolute"], + dg_defaults["max-fraction"], + dg_defaults["ptp-fraction"], + dg_defaults["dG-fraction"], + ), + ) + # group.add_option("--multimodel", nargs=3, dest="multimodel", type="float", + # metavar="dg_min dg_max n", + # help="Generate n models from dg_min to dg_max (given by " + # "--dg-mode) and perform multimodel analysis. " + # "This overrides any value given for --dg") + parser.add_option_group(group) + + group = OptionGroup(parser, "Saving and Plotting Options", "") + group.add_option( + "--pwa", + dest="pwafile", + metavar="FILE", + help="Save summary of result to FILE (.pwa format).", + ) + group.add_option( + "--save", + dest="savefile", + metavar="FILE", + help="Save result of extraction to FILE (.srmise " "format).", + ) + group.add_option("--plot", "-p", action="store_true", dest="plot", help="Plot extracted peaks.") + group.add_option( + "--liveplot", + "-l", + action="store_true", + dest="liveplot", + help="(Experimental) Plot extracted peaks when fitting.", + ) + group.add_option( + "--wait", + "-w", + action="store_true", + dest="wait", + help="(Experimental) When using liveplot wait for user " "after plotting.", + ) + parser.add_option_group(group) + + group = OptionGroup(parser, "Verbosity Options", "Control detail printed to console.") + group.add_option( + "--informative", + "-i", + action="store_const", + const="info", + dest="verbosity", + help="Summary of progress.", + ) + group.add_option( + "--quiet", + "-q", + action="store_const", + const="warning", + dest="verbosity", + help="[Default] Show minimal summary.", + ) + group.add_option( + "--silent", + "-s", + action="store_const", + const="critical", + dest="verbosity", + help="No non-critical output.", + ) + group.add_option( + "--verbose", + "-v", + action="store_const", + const="debug", + dest="verbosity", + help="Show verbose output.", + ) + parser.add_option_group(group) + + group = OptionGroup(parser, "Deprecated Options", "Not for general use.") + group.add_option( + "--scale", + action="store_true", + dest="scale", + help="(Deprecated) Scale supersampled uncertainties by " + "sqrt(oversampling) in intermediate steps when " + "Nyquist sampling.", + ) + group.add_option( + "--no-scale", + action="store_false", + dest="scale", + help="(Deprecated) Never rescale uncertainties.", + ) + parser.add_option_group(group) + + (options, args) = parser.parse_args() + + if len(args) != 1: + parser.error("Exactly one argument required. \n" + usage) + + from diffpy.srmise import srmiselog + + srmiselog.setlevel(options.verbosity) + + from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction + from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError + + try: + options.peakfunction = eval("peaks." + options.peakfunction) + except Exception as err: + print(err) + print("Could not create peak function '%s'. Exiting." % options.peakfunction) + return + + try: + options.modelevaluator = eval("modelevaluators." + options.modelevaluator) + except Exception as err: + print(err) + print("Could not find ModelEvaluator '%s'. Exiting." % options.modelevaluator) + return + + if options.bcrystal is not None: + from diffpy.srmise.baselines.polynomial import Polynomial + + bl = Polynomial(degree=1) + options.baseline = parsepars(bl, [options.bcrystal, "0c"]) + options.baseline.pars[0] = -4 * np.pi * options.baseline.pars[0] + elif options.bsrmise is not None: + # use baseline from existing file + blext = PDFPeakExtraction() + blext.read(options.bsrmise) + options.baseline = blext.extracted.baseline + elif options.bpoly0 is not None: + from diffpy.srmise.baselines.polynomial import Polynomial + + bl = Polynomial(degree=0) + options.baseline = parsepars(bl, [options.bpoly0]) + elif options.bpoly1 is not None: + from diffpy.srmise.baselines.polynomial import Polynomial + + bl = Polynomial(degree=1) + options.baseline = parsepars(bl, options.bpoly1) + elif options.bpoly2 is not None: + from diffpy.srmise.baselines.polynomial import Polynomial + + bl = Polynomial(degree=2) + options.baseline = parsepars(bl, options.bpoly2) + elif options.bseq is not None: + from diffpy.srmise.baselines.fromsequence import FromSequence + + bl = FromSequence(options.bseq) + options.baseline = bl.actualize([], "internal") + elif options.bspherical is not None: + from diffpy.srmise.baselines.nanospherical import NanoSpherical + + bl = NanoSpherical() + options.baseline = parsepars(bl, options.bspherical) + + try: + options.baseline = eval("baselines." + options.baseline) + except Exception as err: + print(err) + print("Could not create baseline '%s'. Exiting." % options.baseline) + return + + filename = args[0] + + if filename is not None: + ext = PDFPeakExtraction() + try: + ext.read(filename) + except (SrMiseDataFormatError, SrMiseFileError, Exception): + ext.loadpdf(filename) + + pdict = {} + if options.peakfunction is not None: + pdict["pf"] = [options.peakfunction] + if options.baseline is not None: + pdict["baseline"] = options.baseline + if options.cres is not None: + pdict["cres"] = options.cres + if options.dg_mode is None: + if options.dg is not None: + options.dg_mode = "absolute" + elif ext.dy is None: + options.dg_mode = "max-fraction" + else: + options.dg_mode = "dG-fraction" + if options.dg is None: + options.dg = dg_defaults[options.dg_mode] + if options.dg_mode == "absolute": + pdict["effective_dy"] = options.dg * np.ones(len(ext.x)) + elif options.dg_mode == "max-fraction": + pdict["effective_dy"] = options.dg * ext.y.max() * np.ones(len(ext.x)) + elif options.dg_mode == "ptp-fraction": + pdict["effective_dy"] = options.dg * ext.y.ptp() * np.ones(len(ext.y)) + elif options.dg_mode == "dG-fraction": + pdict["effective_dy"] = options.dg * ext.dy + if options.rng is not None: + pdict["rng"] = list(options.rng) + if options.qmax is not None: + pdict["qmax"] = options.qmax if options.qmax == "automatic" else float(options.qmax) + if options.nyquist is not None: + pdict["nyquist"] = options.nyquist + if options.supersample is not None: + pdict["supersample"] = options.supersample + if options.scale is not None: + pdict["scale"] = options.scale + if options.modelevaluator is not None: + pdict["error_method"] = options.modelevaluator + + if options.liveplot: + from diffpy.srmise import srmiselog + + srmiselog.liveplotting(True, options.wait) + + ext.setvars(**pdict) + cov = None + if options.performextraction: + cov = ext.extract() + + if options.savefile is not None: + try: + ext.write(options.savefile) + except SrMiseFileError as err: + print(err) + print("Could not save result to '%s'." % options.savefile) + + if options.pwafile is not None: + try: + ext.writepwa(options.pwafile) + except SrMiseFileError as err: + print(err) + print("Could not save pwa summary to '%s'." % options.pwafile) + + print(ext) + if cov is not None: + print(cov) + + if options.plot: + from diffpy.srmise.applications.plot import makeplot + + makeplot(ext) + plt.show() + elif options.liveplot: + plt.show() + + +def parsepars(mp, parseq): + """Return actualized model from sequence of strings. + + Each item in parseq must be interpretable as a float, or as + a float with the character 'c' appended. If 'c' is appended, + that parameter will be fixed. + + Parameters: + mp - A ModelPart instance + parseq - A sequence of string + """ + pars = [] + free = [] + for p in parseq: + if p[-1] == "c": + pars.append(float(p[0:-1])) + free.append(False) + else: + pars.append(float(p)) + free.append(True) + return mp.actualize(pars, "internal", free=free) + + +# Class to preserve newlines in optparse +# Borrowed, with minor changes, from +# http://groups.google.com/group/comp.lang.python/browse_frm/thread/6df6e6b541a15bc2/09f28e26af0699b1 + + +class IndentedHelpFormatterWithNL(IndentedHelpFormatter): + def _format_text(self, text): + if not text: + return "" + text_width = self.width - self.current_indent + indent = " " * self.current_indent + # the above is still the same + bits = text.split("\n") + formatted_bits = [ + textwrap.fill(bit, text_width, initial_indent=indent, subsequent_indent=indent) for bit in bits + ] + result = "\n".join(formatted_bits) + "\n" + return result + + def format_option(self, option): + # The help for each option consists of two parts: + # * the opt strings and metavars + # eg. ("-x", or "-fFILENAME, --file=FILENAME") + # * the user-supplied help string + # eg. ("turn on expert mode", "read data from FILENAME") + # + # If possible, we write both of these on the same line: + # -x turn on expert mode + # + # But if the opt string list is too long, we put the help + # string on a second line, indented to the same column it would + # start in if it fit on the first line. + # -fFILENAME, --file=FILENAME + # read data from FILENAME + result = [] + opts = self.option_strings[option] + opt_width = self.help_position - self.current_indent - 2 + if len(opts) > opt_width: + opts = "%*s%s\n" % (self.current_indent, "", opts) + indent_first = self.help_position + else: # start help on same line as opts + opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts) + indent_first = 0 + result.append(opts) + if option.help: + help_text = self.expand_default(option) + # Everything is the same up through here + help_lines = [] + for para in help_text.split("\n"): + help_lines.extend(textwrap.wrap(para, self.help_width)) + # Everything is the same after here + result.append("%*s%s\n" % (indent_first, "", help_lines[0])) + result.extend(["%*s%s\n" % (self.help_position, "", line) for line in help_lines[1:]]) + elif opts[-1] != "\n": + result.append("\n") + return "".join(result) + + +# End class + +if __name__ == "__main__": + main() diff --git a/diffpy/srmise/applications/plot.py b/src/diffpy/srmise/applications/plot.py similarity index 54% rename from diffpy/srmise/applications/plot.py rename to src/diffpy/srmise/applications/plot.py index b696aa8..2663158 100755 --- a/diffpy/srmise/applications/plot.py +++ b/src/diffpy/srmise/applications/plot.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -18,58 +19,72 @@ import matplotlib.pyplot as plt import mpl_toolkits.axisartist as AA import numpy as np -from mpl_toolkits.axes_grid1 import make_axes_locatable +from matplotlib.ticker import MultipleLocator from mpl_toolkits.axes_grid1.inset_locator import inset_axes -from diffpy.srmise import PDFPeakExtraction, PeakStability -from diffpy.srmise.pdfpeakextraction import resample +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction, resample +from diffpy.srmise.peakstability import PeakStability # For a given figure, returns a label of interest labeldict = {} -default_gobs_style = {'color' : 'b', 'linestyle' : '', - 'markeredgecolor' : 'b', 'marker' : 'o', - 'markerfacecolor' : 'none', 'markersize' : 4} - -default_gfit_style = {'color' : 'g'} -default_gind_style = {'facecolor' : 'green', 'alpha' : 0.2} +default_gobs_style = { + "color": "b", + "linestyle": "", + "markeredgecolor": "b", + "marker": "o", + "markerfacecolor": "none", + "markersize": 4, +} + +default_gfit_style = {"color": "g"} +default_gind_style = {"facecolor": "green", "alpha": 0.2} default_gres_style = {} default_ep_style = {} default_ip_style = {} -default_dg_style = {'linestyle' : 'none', 'color' : 'black', - 'marker' : 'o', 'markerfacecolor' : 'black', - 'markeredgecolor' : 'black', - 'markersize' : 1, 'antialiased': False} +default_dg_style = { + "linestyle": "none", + "color": "black", + "marker": "o", + "markerfacecolor": "black", + "markeredgecolor": "black", + "markersize": 1, + "antialiased": False, +} def setfigformat(figsize): from matplotlib import rc - rc('legend', numpoints=2) - rc('figure', figsize=figsize) - rc('axes', titlesize=12, labelsize=11) - rc('xtick', labelsize=10) - rc('ytick', labelsize=10) - rc('lines', linewidth=0.75, markeredgewidth=0.5) + + rc("legend", numpoints=2) + rc("figure", figsize=figsize) + rc("axes", titlesize=12, labelsize=11) + rc("xtick", labelsize=10) + rc("ytick", labelsize=10) + rc("lines", linewidth=0.75, markeredgewidth=0.5) return + def gr_formataxis(ax=None): - if ax is None: ax = plt.gca() + if ax is None: + ax = plt.gca() ax.xaxis.set_minor_locator(MultipleLocator(1)) - ax.yaxis.set_label_position('left') + ax.yaxis.set_label_position("left") ax.yaxis.tick_left() - ax.yaxis.set_ticks_position('both') + ax.yaxis.set_ticks_position("both") return + def comparepositions(ppe, ip=None, **kwds): ax = kwds.get("ax", plt.gca()) - base = kwds.get("base", 0.) - yideal = kwds.get("yideal", -1.) - yext = kwds.get("yext", 1.) + base = kwds.get("base", 0.0) + yideal = kwds.get("yideal", -1.0) + yext = kwds.get("yext", 1.0) ep_style = kwds.get("ep_style", default_ep_style) ip_style = kwds.get("ip_style", default_ip_style) - yideal_label = kwds.get("yideal_label", r'ideal') - yext_label = kwds.get("yext_label", r'found') + yideal_label = kwds.get("yideal_label", r"ideal") + yext_label = kwds.get("yext_label", r"found") pmin = kwds.get("pmin", -np.inf) pmax = kwds.get("pmax", np.inf) @@ -77,39 +92,38 @@ def comparepositions(ppe, ip=None, **kwds): ep = [p for p in ep if p >= pmin and p <= pmax] if ip is not None: - xi = np.NaN + np.zeros(3*len(ip)) + xi = np.nan + np.zeros(3 * len(ip)) xi[0::3] = ip xi[1::3] = ip yi = np.zeros_like(xi) + base yi[1::3] += yideal - plt.plot(xi, yi, 'b', lw=1.5, **ip_style) + plt.plot(xi, yi, "b", lw=1.5, **ip_style) - xe = np.NaN + np.zeros(3*len(ep)) + xe = np.nan + np.zeros(3 * len(ep)) xe[0::3] = ep xe[1::3] = ep ye = np.zeros_like(xe) + base ye[1::3] += yext - plt.plot(xe, ye, 'g', lw=1.5, **ep_style) + plt.plot(xe, ye, "g", lw=1.5, **ep_style) if ip is not None: - yb = (base, base) - plt.axhline(base, linestyle=":", color="k" ) - ax.yaxis.set_ticks([base+.5*yideal, base+.5*yext]) + plt.axhline(base, linestyle=":", color="k") + ax.yaxis.set_ticks([base + 0.5 * yideal, base + 0.5 * yext]) ax.yaxis.set_ticklabels([yideal_label, yext_label]) else: - ax.yaxis.set_ticks([base+.5*yext]) + ax.yaxis.set_ticks([base + 0.5 * yext]) ax.yaxis.set_ticklabels([yext_label]) # Set ylim explicitly, for case where yext is empty. if ip is not None: - plt.ylim(base+yideal, base+yext) + plt.ylim(base + yideal, base + yext) else: - plt.ylim(base, base+yext) + plt.ylim(base, base + yext) for tick in ax.yaxis.get_major_ticks(): tick.tick1line.set_markersize(0) tick.tick2line.set_markersize(0) - tick.label1.set_verticalalignment('center') + tick.label1.set_verticalalignment("center") tick.label1.set_fontsize(8) ticks = ax.yaxis.get_major_ticks() ticks[-1].label1.set_color("green") @@ -117,37 +131,45 @@ def comparepositions(ppe, ip=None, **kwds): ticks[0].label1.set_color("blue") return + def dgseries(stability, **kwds): - ax = kwds.get("ax", plt.gca()) + kwds.get("ax", plt.gca()) dg_style = kwds.get("dg_style", default_dg_style) - scale = kwds.get("scale", 1.) + scale = kwds.get("scale", 1.0) - dgmin = kwds.get("dgmin", stability.results[0][0])*scale - dgmax = kwds.get("dgmax", stability.results[-1][0])*scale + dgmin = kwds.get("dgmin", stability.results[0][0]) * scale + dgmax = kwds.get("dgmax", stability.results[-1][0]) * scale - pmin = kwds.get("pmin", 0.) + pmin = kwds.get("pmin", 0.0) pmax = kwds.get("pmax", np.inf) x = [] y = [] for dg, peaks, bl, dr in stability.results: - if dg*scale < dgmin or dg*scale > dgmax: + if dg * scale < dgmin or dg * scale > dgmax: continue peakpos = [p["position"] for p in peaks] peakpos = [p for p in peakpos if p >= pmin and p <= pmax] x.extend(peakpos) - y.extend(np.zeros_like(peakpos) + dg*scale) + y.extend(np.zeros_like(peakpos) + dg * scale) plt.plot(x, y, **dg_style) + def labelallsubplots(): rv = [] - for i, c in enumerate('abcd'): + for i, c in enumerate("abcd"): plt.subplot(221 + i) s = "(%s)" % c - ht = plt.text(0.04, 0.95, s, - horizontalalignment='left', verticalalignment='top', - transform=gca().transAxes, weight='bold') + ht = plt.text( + 0.04, + 0.95, + s, + horizontalalignment="left", + verticalalignment="top", + transform=plt.gca().transAxes, + weight="bold", + ) rv.append(ht) return rv @@ -163,15 +185,15 @@ def makeplot(ppe_or_stability, ip=None, **kwds): if ppe.extracted is None: # Makeplot requires a ModelCluster, so whip one up. - from diffpy.srmise import ModelCluster - ppe.defaultvars() # Make sure everything has some setting. This - # shouldn't have harmful side effects. + from diffpy.srmise.modelcluster import ModelCluster + + ppe.defaultvars() # Make sure everything has some setting. This + # shouldn't have harmful side effects. rangeslice = ppe.getrangeslice() x = ppe.x[rangeslice] y = ppe.y[rangeslice] dy = ppe.effective_dy[rangeslice] - mcluster = ModelCluster(ppe.initial_peaks, ppe.baseline, x, y, \ - dy, None, ppe.error_method, ppe.pf) + mcluster = ModelCluster(ppe.initial_peaks, ppe.baseline, x, y, dy, None, ppe.error_method, ppe.pf) ext = mcluster else: ext = ppe.extracted @@ -189,17 +211,17 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Define heights and interstitial offsets # All values in percent of main axis. - top_offset = kwds.get("top_offset", 0.) - dg_height = kwds.get("dg_height", 15. if stability is not None else 0.) - cmp_height = kwds.get("cmp_height", 15. if ip is not None else 7.5) - datatop_offset = kwds.get("datatop_offset", 3.) + top_offset = kwds.get("top_offset", 0.0) + dg_height = kwds.get("dg_height", 15.0 if stability is not None else 0.0) + cmp_height = kwds.get("cmp_height", 15.0 if ip is not None else 7.5) + datatop_offset = kwds.get("datatop_offset", 3.0) # <- Data appears here -> - databottom_offset = kwds.get("databottom_offset", 3.) + databottom_offset = kwds.get("databottom_offset", 3.0) # <- Residual appears here -> - bottom_offset = kwds.get("bottom_offset", 3.) + bottom_offset = kwds.get("bottom_offset", 3.0) # Style options - dg_style = kwds.get("dg_style", default_dg_style) + kwds.get("dg_style", default_dg_style) gobs_style = kwds.get("gobs_style", default_gobs_style) gfit_style = kwds.get("gfit_style", default_gfit_style) gind_style = kwds.get("gind_style", default_gind_style) @@ -208,83 +230,81 @@ def makeplot(ppe_or_stability, ip=None, **kwds): ip_style = kwds.get("ip_style", default_ip_style) # Label options - userxlabel = kwds.get("xlabel", r'r ($\mathrm{\AA}$)') - userylabel = kwds.get("ylabel", r'G ($\mathrm{\AA^{-2}}$)') - datalabelx = kwds.get("datalabelx", .04) - yideal_label = kwds.get("yideal_label", r'ideal') - yext_label = kwds.get("yext_label", r'found') + userxlabel = kwds.get("xlabel", r"r ($\mathrm{\AA}$)") + userylabel = kwds.get("ylabel", r"G ($\mathrm{\AA^{-2}}$)") + datalabelx = kwds.get("datalabelx", 0.04) + yideal_label = kwds.get("yideal_label", r"ideal") + yext_label = kwds.get("yext_label", r"found") # Other options datalabel = kwds.get("datalabel", None) - dgformatstr = kwds.get("dgformatstr", r'$\delta$g=%f') - dgformatpost = kwds.get("dgformatpost", None) #->userfunction(string) + dgformatstr = kwds.get("dgformatstr", r"$\delta$g=%f") + kwds.get("dgformatpost", None) # ->userfunction(string) show_fit = kwds.get("show_fit", True) show_individual = kwds.get("show_individual", True) fill_individual = kwds.get("fill_individual", True) show_observed = kwds.get("show_observed", True) show_residual = kwds.get("show_residual", True) - mask_residual = kwds.get("mask_residual", False) #-> number + mask_residual = kwds.get("mask_residual", False) # -> number show_annotation = kwds.get("show_annotation", True) - scale = kwds.get("scale", 1.) # Apply a global scaling factor to the data - - + scale = kwds.get("scale", 1.0) # Apply a global scaling factor to the data # Define the various data which will be plotted r = ext.r_cluster - dr = (r[-1]-r[0])/len(r) - rexpand = np.concatenate((np.arange(r[0]-dr, xlo, -dr)[::-1], r, np.arange(r[-1]+dr, xhi+dr, dr))) - rfine = np.arange(r[0], r[-1], .1*dr) - gr_obs = np.array(resample(ppe.x, ppe.y, rexpand))*scale - #gr_fit = resample(r, ext.value(), rfine) - gr_fit = np.array(ext.value(rfine))*scale - gr_fit_baseline = np.array(ext.valuebl(rfine))*scale - gr_fit_ind = [gr_fit_baseline + np.array(p.value(rfine))*scale for p in ext.model] - gr_res = np.array(ext.residual())*scale + dr = (r[-1] - r[0]) / len(r) + rexpand = np.concatenate((np.arange(r[0] - dr, xlo, -dr)[::-1], r, np.arange(r[-1] + dr, xhi + dr, dr))) + rfine = np.arange(r[0], r[-1], 0.1 * dr) + gr_obs = np.array(resample(ppe.x, ppe.y, rexpand)) * scale + # gr_fit = resample(r, ext.value(), rfine) + gr_fit = np.array(ext.value(rfine)) * scale + gr_fit_baseline = np.array(ext.valuebl(rfine)) * scale + gr_fit_ind = [gr_fit_baseline + np.array(p.value(rfine)) * scale for p in ext.model] + gr_res = np.array(ext.residual()) * scale if mask_residual: gr_res = np.ma.masked_outside(gr_res, -mask_residual, mask_residual) all_gr = [] - if show_fit: all_gr.append(gr_fit) - #if show_individual: all_gr.extend([gr_fit_baseline, gr_fit_ind]) + if show_fit: + all_gr.append(gr_fit) + # if show_individual: all_gr.extend([gr_fit_baseline, gr_fit_ind]) if show_individual: all_gr.append(gr_fit_baseline) if len(gr_fit_ind) > 0: all_gr.extend(gr_fit_ind) - if show_observed: all_gr.append(gr_obs) + if show_observed: + all_gr.append(gr_obs) # gr_fit_ind is a list of lists, so use np.min/max # The funky bit with scale makes sure that a user-specified value # has scale applied to it, without messing up the default values, # which are calculated from already scaled quantities. - min_gr = kwds.get("min_gr", np.min([np.min(gr) for gr in all_gr])/scale)*scale - max_gr = kwds.get("max_gr", np.max([np.max(gr) for gr in all_gr])/scale)*scale - + min_gr = kwds.get("min_gr", np.min([np.min(gr) for gr in all_gr]) / scale) * scale + max_gr = kwds.get("max_gr", np.max([np.max(gr) for gr in all_gr]) / scale) * scale if show_residual: min_res = np.min(gr_res) max_res = np.max(gr_res) else: - min_res = 0. - max_res = 0. + min_res = 0.0 + max_res = 0.0 # Derive various y limits based on all the offsets - rel_height = 100. - top_offset - dg_height - cmp_height - datatop_offset - databottom_offset - bottom_offset - abs_height = 100*((max_gr - min_gr) + (max_res - min_res))/rel_height + rel_height = 100.0 - top_offset - dg_height - cmp_height - datatop_offset - databottom_offset - bottom_offset + abs_height = 100 * ((max_gr - min_gr) + (max_res - min_res)) / rel_height - yhi = max_gr + (top_offset + dg_height + cmp_height + datatop_offset)*abs_height/100 + yhi = max_gr + (top_offset + dg_height + cmp_height + datatop_offset) * abs_height / 100 ylo = yhi - abs_height yhi = kwds.get("yhi", yhi) ylo = kwds.get("ylo", ylo) - datatop = yhi - (yhi-ylo)*.01*(top_offset + dg_height + cmp_height) - datalabeltop = 1 - .01*(top_offset + dg_height + cmp_height + datatop_offset) - resbase = ylo + bottom_offset*abs_height/100 - min_res + datatop = yhi - (yhi - ylo) * 0.01 * (top_offset + dg_height + cmp_height) + datalabeltop = 1 - 0.01 * (top_offset + dg_height + cmp_height + datatop_offset) + resbase = ylo + bottom_offset * abs_height / 100 - min_res resbase = kwds.get("resbase", resbase) - fig = kwds.get("figure", plt.gcf()) fig.clf() ax_data = AA.Subplot(fig, 111) @@ -301,8 +321,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): for peak in gr_fit_ind: plt.fill_between(rfine, gr_fit_baseline, peak, **gind_style) if show_residual: - plt.plot(r, gr_res + resbase, 'r-', **gres_style) - plt.plot((xlo, xhi), 2*[resbase], 'k:') + plt.plot(r, gr_res + resbase, "r-", **gres_style) + plt.plot((xlo, xhi), 2 * [resbase], "k:") # Format ax_data plt.xlim(xlo, xhi) @@ -310,27 +330,33 @@ def makeplot(ppe_or_stability, ip=None, **kwds): plt.xlabel(userxlabel) plt.ylabel(userylabel) ax_data.xaxis.set_minor_locator(plt.MultipleLocator(1)) - #ax_data.yaxis.set_minor_locator(plt.MultipleLocator(np.max([1,int((yhi-ylo)/20)]))) - ax_data.yaxis.set_label_position('left') + # ax_data.yaxis.set_minor_locator(plt.MultipleLocator(np.max([1,int((yhi-ylo)/20)]))) + ax_data.yaxis.set_label_position("left") ax_data.yaxis.tick_left() - ax_data.yaxis.set_ticks_position('both') + ax_data.yaxis.set_ticks_position("both") # Remove labels above where insets begin - #ax_data.yaxis.set_ticklabels([str(int(loc)) for loc in ax_data.yaxis.get_majorticklocs() if loc < datatop]) + # ax_data.yaxis.set_ticklabels([str(int(loc)) for loc in ax_data.yaxis.get_majorticklocs() if loc < datatop]) ax_data.yaxis.set_ticks([loc for loc in ax_data.yaxis.get_majorticklocs() if (loc < datatop and loc >= ylo)]) - # Dataset label if datalabel is not None: - dl = plt.text(datalabelx, datalabeltop, datalabel, ha='left', va='top', - transform=ax_data.transAxes, weight='bold') + dl = plt.text( + datalabelx, + datalabeltop, + datalabel, + ha="left", + va="top", + transform=ax_data.transAxes, + weight="bold", + ) else: dl = None figdict["datalabel"] = dl # Create new x axis at bottom edge of compare inset ax_data.axis["top"].set_visible(False) - ax_data.axis["newtop"] = ax_data.new_floating_axis(0, datatop, axis_direction="bottom") # "top" bugged? + ax_data.axis["newtop"] = ax_data.new_floating_axis(0, datatop, axis_direction="bottom") # "top" bugged? ax_data.axis["newtop"].toggle(all=False, ticks=True) ax_data.axis["newtop"].major_ticks.set_tick_out(True) ax_data.axis["newtop"].minor_ticks.set_tick_out(True) @@ -339,37 +365,55 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # The original label is invisible, but we use its (dynamic) x position # to update the new label, which we define have the correct y position. # A bit of a tradeoff for the nice insets and ability to define new axes. - newylabel = plt.text(-.1, .5*(datatop-ylo)/(yhi-ylo), userylabel, - ha='center', va='center', rotation='vertical', transform=ax_data.transAxes) - labeldict[fig] = newylabel # so we can find the correct text object - fig.canvas.mpl_connect('draw_event', on_draw) # original label invisibility and updating + newylabel = plt.text( + -0.1, + 0.5 * (datatop - ylo) / (yhi - ylo), + userylabel, + ha="center", + va="center", + rotation="vertical", + transform=ax_data.transAxes, + ) + labeldict[fig] = newylabel # so we can find the correct text object + fig.canvas.mpl_connect("draw_event", on_draw) # original label invisibility and updating # Compare extracted (and ideal, if provided) peak positions clearly. if cmp_height > 0: - ax_cmp = inset_axes(ax_data, - width="100%", - height="%s%%" %cmp_height, - loc=2, - bbox_to_anchor=(0., -.01*(top_offset+dg_height), 1, 1), - bbox_transform=ax_data.transAxes, - borderpad=0) + ax_cmp = inset_axes( + ax_data, + width="100%", + height="%s%%" % cmp_height, + loc=2, + bbox_to_anchor=(0.0, -0.01 * (top_offset + dg_height), 1, 1), + bbox_transform=ax_data.transAxes, + borderpad=0, + ) figdict["cmp"] = ax_cmp plt.axes(ax_cmp) - comparepositions(ext, ip, ep_style=ep_style, ip_style=ip_style, yideal_label=yideal_label, yext_label=yext_label) + comparepositions( + ext, + ip, + ep_style=ep_style, + ip_style=ip_style, + yideal_label=yideal_label, + yext_label=yext_label, + ) plt.xlim(xlo, xhi) ax_cmp.set_xticks([]) # Show how extracted peak positions change as dg is changed if dg_height > 0: - ax_dg = inset_axes(ax_data, - width="100%", - height="%s%%" %dg_height, - loc=2, - bbox_to_anchor=(0, -.01*top_offset, 1, 1), - bbox_transform=ax_data.transAxes, - borderpad=0) + ax_dg = inset_axes( + ax_data, + width="100%", + height="%s%%" % dg_height, + loc=2, + bbox_to_anchor=(0, -0.01 * top_offset, 1, 1), + bbox_transform=ax_data.transAxes, + borderpad=0, + ) figdict["dg"] = ax_dg plt.axes(ax_dg) @@ -384,31 +428,42 @@ def makeplot(ppe_or_stability, ip=None, **kwds): plt.xlim(xlo, xhi) ax_dg.xaxis.set_major_locator(plt.NullLocator()) ax_dg.yaxis.set_major_locator(plt.MaxNLocator(3)) - plt.ylabel(r'$\delta$g') + plt.ylabel(r"$\delta$g") # Annotate the actual dg shown if show_annotation: - dg = np.mean(ext.error_cluster)*scale - dgstr = dgformatstr %dg - if "dgformatpost" in kwds: #post-processing on dg annotation + dg = np.mean(ext.error_cluster) * scale + dgstr = dgformatstr % dg + if "dgformatpost" in kwds: # post-processing on dg annotation dgstr = kwds["dgformatpost"](dgstr) if len(ext.model) > 0: - xpos = np.mean([xlo, ext.model[0]["position"]]) # OK for now. + xpos = np.mean([xlo, ext.model[0]["position"]]) # OK for now. else: - xpos = xlo + .1*(xhi-xlo) + xpos = xlo + 0.1 * (xhi - xlo) if dg_height > 0 and cmp_height > 0: # Arrow, text in compare distances line ylo2, yhi2 = ax_dg.get_ylim() if ip is not None: - ypos = ylo2 - .25*cmp_height/dg_height*(yhi2-ylo2) + ypos = ylo2 - 0.25 * cmp_height / dg_height * (yhi2 - ylo2) else: - ypos = ylo2 - .5*cmp_height/dg_height*(yhi2-ylo2) - plt.annotate(dgstr, xy=(xlo, dg), xycoords='data', va='center', ha='center', - xytext=(xpos,ypos), textcoords='data', size=8, color='green', - arrowprops=dict(arrowstyle="->", - connectionstyle="angle,angleA=90,angleB=0,rad=10", - color="green")) + ypos = ylo2 - 0.5 * cmp_height / dg_height * (yhi2 - ylo2) + plt.annotate( + dgstr, + xy=(xlo, dg), + xycoords="data", + va="center", + ha="center", + xytext=(xpos, ypos), + textcoords="data", + size=8, + color="green", + arrowprops=dict( + arrowstyle="->", + connectionstyle="angle,angleA=90,angleB=0,rad=10", + color="green", + ), + ) elif dg_height > 0: # Arrow, and text located somewhere in main plot region @@ -419,8 +474,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Must change axes plt.axes(ax_cmp) ylo2, yhi2 = ax_cmp.get_ylim() - ypos = yhi2/2. - plt.text(xpos, ypos, dgstr, va='center', ha='center', size=8, color='green') + ypos = yhi2 / 2.0 + plt.text(xpos, ypos, dgstr, va="center", ha="center", size=8, color="green") else: # Text only in main plot region # Must change axes @@ -439,6 +494,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # invisiblelabel must be temporarily made visible to update # its values. _lastxpos = {} + + def on_draw(event): global _lastxpos fig = event.canvas.figure @@ -458,7 +515,7 @@ def on_draw(event): # If it is kept visible the whole time this problem doesn't occur. # This problem doesn't occur onscreen (TkAgg) or printing PDFs, and # didn't occur in matplotlib 1.0.0. - if abs(xpos - _lastxpos.get(fig, 0)) > .001: + if abs(xpos - _lastxpos.get(fig, 0)) > 0.001: _lastxpos[fig] = xpos plt.draw() else: @@ -466,33 +523,34 @@ def on_draw(event): invisiblelabel.set_visible(False) xpos_old = visiblelabel.get_position()[0] - if abs(xpos - xpos_old) > .001: + if abs(xpos - xpos_old) > 0.001: labeldict[fig].set_x(xpos) plt.draw() return + def readcompare(filename): """Returns a list of distances read from filename, otherwise None.""" - from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError # TODO: Make this safer try: - datastring = open(filename,'rb').read() - except Exception, err: + datastring = open(filename, "rb").read() + except Exception as err: raise err import re - res = re.search(r'^[^#]', datastring, re.M) + + res = re.search(r"^[^#]", datastring, re.M) if res: - datastring = datastring[res.end():].strip() + datastring = datastring[res.end() :].strip() distances = [] try: for line in datastring.split("\n"): distances.append(float(line)) - except (ValueError, IndexError), err: - print "Could not read distances from '%s'. Ignoring file." %filename + except (ValueError, IndexError): + print("Could not read distances from '%s'. Ignoring file." % filename) if len(distances) == 0: return None @@ -502,31 +560,37 @@ def readcompare(filename): def main(): # configure options parsing - usage = ("%prog srmise_file [options]\n" - "srmise_file can be an extraction file saved by SrMise, " - "or a data file saved by PeakStability.") - descr = ("A very basic tool for somewhat prettier plotting than provided by " - "the basic SrMise classes. Can be used to compare peak positions " - "with those from a list.\n" - "NOTE: At this time the utility only works with peaks extracted using diffpy.srmise.PDFPeakExtraction.") + usage = ( + "%prog srmise_file [options]\n" + "srmise_file can be an extraction file saved by SrMise, " + "or a data file saved by PeakStability." + ) + descr = ( + "A very basic tool for somewhat prettier plotting than provided by " + "the basic SrMise classes. Can be used to compare peak positions " + "with those from a list.\n" + "NOTE: At this time the utility only works with peaks extracted using diffpy.srmise.PDFPeakExtraction." + ) parser = optparse.OptionParser(usage=usage, description=descr) - parser.add_option("--compare", type="string", - help="Compare extracted distances to distances listed (1/line) in this file.") - parser.add_option("--model", type="int", - help="Plot given model from set. Ignored if srmise_file is not a PeakStability file.") - parser.add_option("--show", action="store_true", - help="execute pylab.show() blocking call") - parser.add_option("-o", "--output", type="string", - help="save plot to the specified file") - parser.add_option("--format", type="string", default="eps", - help="output format for plot saving") + parser.add_option( + "--compare", + type="string", + help="Compare extracted distances to distances listed (1/line) in this file.", + ) + parser.add_option( + "--model", + type="int", + help="Plot given model from set. Ignored if srmise_file is not a PeakStability file.", + ) + parser.add_option("--show", action="store_true", help="execute pylab.show() blocking call") + parser.add_option("-o", "--output", type="string", help="save plot to the specified file") + parser.add_option("--format", type="string", default="eps", help="output format for plot saving") parser.allow_interspersed_args = True opts, args = parser.parse_args(sys.argv[1:]) - if len(args) != 1: - parser.error("Exactly one argument required. \n"+usage) + parser.error("Exactly one argument required. \n" + usage) filename = args[0] @@ -534,27 +598,27 @@ def main(): toplot = PDFPeakExtraction() try: toplot.read(filename) - except (Exception): + except Exception: toplot = PeakStability() try: toplot.load(filename) except Exception: - print "File '%s' is not a .srmise or PeakStability data file." %filename + print("File '%s' is not a .srmise or PeakStability data file." % filename) return if opts.model is not None: try: toplot.setcurrent(opts.model) - except (Exception): - print "Ignoring model, %s is not a PeakStability file." %filename + except Exception: + print("Ignoring model, %s is not a PeakStability file." % filename) distances = None if opts.compare is not None: # use baseline from existing file distances = readcompare(opts.compare) - setfigformat(figsize=(6., 4.0)) - figdict = makeplot(toplot, distances) + setfigformat(figsize=(6.0, 4.0)) + makeplot(toplot, distances) if opts.output: plt.savefig(opts.output, format=opts.format, dpi=600) if opts.show: @@ -564,5 +628,5 @@ def main(): return -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/diffpy/srmise/basefunction.py b/src/diffpy/srmise/basefunction.py similarity index 61% rename from diffpy/srmise/basefunction.py rename to src/diffpy/srmise/basefunction.py index 18a55e4..1711138 100644 --- a/diffpy/srmise/basefunction.py +++ b/src/diffpy/srmise/basefunction.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -18,31 +19,36 @@ import numpy as np -from diffpy.srmise.modelparts import ModelPart, ModelParts -from diffpy.srmise.srmiseerrors import * +from diffpy.srmise.srmiseerrors import SrMiseDataFormatError logger = logging.getLogger("diffpy.srmise") + class BaseFunction(object): """Base class for mathematical functions which model numeric sequences. - Class members + Attributes ------------- - parameterdict: A dictionary mapping string keys to their index in the - sequence of parameters. These keys apply only to the - default "internal" format. - parformats: A sequence of strings defining what formats are recognized - by a function. - default_formats: A dictionary which maps the strings "default_input" and - "default_output" to strings also appearing in parformats. - "default_input"-> format used internally within the class - "default_output"-> Default format to use when converting - parameters for outside use. - metadict: Dictionary mapping string keys to tuple (v, m) where v is an - additional argument required by function, and m is a method - whose string output recreates v when passed to eval(). - base: A basefunction subclass instance which this one decorates with - additional functionality. + parameterdict : dict + The dictionary mapping string keys to their index in the + sequence of parameters. These keys apply only to the + default "internal" format. + parformats : array-like + The sequence of strings defining what formats are recognized + by a function. + default_formats : dict + The dictionary which maps the strings "default_input" and + "default_output" to strings also appearing in parformats. + "default_input"-> format used internally within the class + "default_output"-> Default format to use when converting + parameters for outside use. + metadict : dict + The Dictionary mapping string keys to tuple (v, m) where v is an + additional argument required by function, and m is a method + whose string output recreates v when passed to eval(). + base : BaseFunction subclass + The basefunction subclass instance which this one decorates with + additional functionality. Class methods (implemented by inheriting classes) ------------------------------------------------- @@ -61,23 +67,38 @@ class BaseFunction(object): transform_parameters() """ - def __init__(self, parameterdict, parformats, default_formats, metadict, base=None, Cache=None): + def __init__( + self, + parameterdict, + parformats, + default_formats, + metadict, + base=None, + Cache=None, + ): """Set parameterdict defined by subclass Parameters - parameterdict - A dictionary mapping string keys (e.g. "position") - to their index in a sequence of parameters for this - PeakFunction subclass. Every parameter must appear. - parformats - A sequence of strings containing all allowed input/output - formats defined for the function's parameters. - default_formats - A dictionary mapping the string keys "internal" and - "default_output" to formats from parformats. - metadict - Dictionary mapping string keys to additional arguments - required by function. - base - A basefunction subclass instance which this one decorates with - additional functionality. - Cache - A class (not instance) which implements caching of BaseFunction - evaluations. + ---------- + parameterdict : dict + The dictionary mapping string keys (e.g. "position") + to their index in a sequence of parameters for this + PeakFunction subclass. Every parameter must appear. + parformats : array-like + The sequence of strings containing all allowed input/output + formats defined for the function's parameters. + default_formats : dict + The dictionary mapping the string keys "internal" and + "default_output" to formats from parformats. + metadict : dict + The dictionary mapping string keys to additional arguments + required by function. + base : basefunction subclass + The basefunction subclass instance which this one decorates with + additional functionality. + Cache : class + The class (not instance) which implements caching of BaseFunction + evaluations. """ self.parameterdict = parameterdict self.npars = len(self.parameterdict) @@ -90,29 +111,28 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No # arbitrary types, parameters are indexed by these keys as well as # integer indices. Restricting keys to strings keeps things sane. for p in self.parameterdict.keys(): - if type(p) not in (str, unicode): + if not isinstance(p, str): emsg = "Argument parameterdict's keys must be strings." raise ValueError(emsg) vals = self.parameterdict.values() vals.sort() if vals != range(self.npars): - emsg = "Argument parameterdict's values must uniquely specify "+\ - "the index of each parameter defined by its keys." + emsg = ( + "Argument parameterdict's values must uniquely specify " + + "the index of each parameter defined by its keys." + ) raise ValueError(emsg) self.parformats = parformats # Check validity of default_formats self.default_formats = default_formats - if not ("default_input" in self.default_formats and - "default_output" in self.default_formats): - emsg = "Argument default_formats must specify 'default_input' "+\ - "and 'default_output' as keys." + if not ("default_input" in self.default_formats and "default_output" in self.default_formats): + emsg = "Argument default_formats must specify 'default_input' " + "and 'default_output' as keys." raise ValueError(emsg) for f in self.default_formats.values(): - if not f in self.parformats: - emsg = "Keys of argument default_formats must map to a "+\ - "value within argument parformats." + if f not in self.parformats: + emsg = "Keys of argument default_formats must map to a " + "value within argument parformats." raise ValueError() # Set metadictionary @@ -126,13 +146,12 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No # of PeakFunction. # Object to cache: (basefunctioninstance, tuple of parameters) if Cache is not None: - #self.value = Cache(self.value, "value") - #self.jacobian = Cache(self.jacobian, "jacobian") + # self.value = Cache(self.value, "value") + # self.jacobian = Cache(self.jacobian, "jacobian") pass return - - #### "Virtual" class methods #### + # "Virtual" class methods #### def actualize(self, *args, **kwds): """Create ModelPart instance of self with given parameters. ("Virtual" method)""" @@ -164,23 +183,25 @@ def _valueraw(self, *args, **kwds): emsg = "_valueraw must() be implemented in a BaseFunction subclass." raise NotImplementedError(emsg) - - #### Class methods #### + # Class methods #### def jacobian(self, p, r, rng=None): """Calculate jacobian of p, possibly restricted by range. Parameters - p - The ModelPart to be evaluated - r - sequence or scalar over which function is evaluated - rng - Optional slice object restricts which r-values are evaluated. - The output has same length as r, but unevaluated objects have - a default value of 0. If caching is enabled these may be - previously calculated values instead. + ---------- + p : ModelPart instance + The ModelPart to be evaluated + r :array-like + sequence or scalar over which function is evaluated + rng : slice object + Optional slice object restricts which r-values are evaluated. + The output has same length as r, but unevaluated objects have + a default value of 0. If caching is enabled these may be + previously calculated values instead. """ if self is not p._owner: - emsg = "Argument 'p' must be evaluated by the BaseFunction "+\ - "subclass which owns it." + emsg = "Argument 'p' must be evaluated by the BaseFunction " + "subclass which owns it." raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -192,7 +213,7 @@ def jacobian(self, p, r, rng=None): output = [None for j in jac] for idx in range(len(output)): if jac[idx] is not None: - output[idx] = r * 0. + output[idx] = r * 0.0 output[idx][rng] = jac[idx] return output except TypeError: @@ -201,10 +222,19 @@ def jacobian(self, p, r, rng=None): def transform_derivatives(self, pars, in_format=None, out_format=None): """Return gradient matrix for pars converted from in_format to out_format. - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class + Parameters + ---------- + pars : array-like + The sequence of parameters + in_format : str + The format defined for this class + out_format : str + The format defined for this class + + Returns + ------- + array-like + The gradient matrix for pars converted from in_format to out_format. """ # Map unspecified formats to specific formats defined in default_formats if in_format is None: @@ -222,12 +252,10 @@ def transform_derivatives(self, pars, in_format=None, out_format=None): elif out_format == "default_input": out_format = self.default_formats["default_input"] - if not in_format in self.parformats: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - if not out_format in self.parformats: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + if in_format not in self.parformats: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + if out_format not in self.parformats: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) if in_format == out_format: return np.identity(self.npars) return self._transform_derivativesraw(pars, in_format=in_format, out_format=out_format) @@ -235,13 +263,22 @@ def transform_derivatives(self, pars, in_format=None, out_format=None): def transform_parameters(self, pars, in_format=None, out_format=None): """Return new sequence with pars converted from in_format to out_format. - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. + Also restores parameters to a preferred range if it permits multiple + values that correspond to the same physical result. + + Parameters + ---------- + pars : array-like + The sequence of parameters + in_format : str + The format defined for this class + out_format : str + The format defined for this class - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class + Returns + ------- + array-like + The new sequence of parameters with out_format. """ # Map unspecified formats to specific formats defined in default_formats if in_format is None: @@ -259,31 +296,36 @@ def transform_parameters(self, pars, in_format=None, out_format=None): elif out_format == "default_input": out_format = self.default_formats["default_input"] - if not in_format in self.parformats: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - if not out_format in self.parformats: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - #if in_format == out_format: + if in_format not in self.parformats: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + if out_format not in self.parformats: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + # if in_format == out_format: # return pars return self._transform_parametersraw(pars, in_format=in_format, out_format=out_format) - def value(self, p, r, rng=None): """Calculate value of ModelPart over r, possibly restricted by range. Parameters - p - The ModelPart to be evaluated - r - sequence or scalar over which function is evaluated - rng - Optional slice object restricts which r-values are evaluated. - The output has same length as r, but unevaluated objects have - a default value of 0. If caching is enabled these may be - previously calculated values instead. + ---------- + p : ModelPart instance + The ModelPart to be evaluated + r : array-like or float + The sequence or scalar over which function is evaluated + rng : slice object + Optional slice object restricts which r-values are evaluated. + The output has same length as r, but unevaluated objects have + a default value of 0. If caching is enabled these may be + previously calculated values instead. + + Returns + ------- + array-like + The value of ModelPart over r, possibly restricted by range. """ if self is not p._owner: - emsg = "Argument 'p' must be evaluated by the BaseFunction "+\ - "subclass which owns it." + emsg = "Argument 'p' must be evaluated by the BaseFunction " + "subclass which owns it." raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -291,7 +333,7 @@ def value(self, p, r, rng=None): if rng is None: rng = slice(0, len(r)) rpart = r[rng] - output = r * 0. + output = r * 0.0 output[rng] = self._valueraw(p.pars, rpart) return output except TypeError: @@ -312,11 +354,16 @@ def pgradient(self, p, format): In the trivial case where format="internal", returns an identity matrix. Parameters - p - A ModelPart - format - The format of the parameters + ---------- + p : ModelPart instance + The ModelPart instance to be evaluated for gradient calculation. + format : str + The format of the parameters Returns - A 2D array containing the partial derivatives. + ------- + array-like + A 2D array containing the partial derivatives. """ return @@ -331,24 +378,30 @@ def writestr(self, baselist): in baselist. Parameters - baselist - List of BaseFunction (or subclass) instances. + ---------- + baselist : array-like + The list of BaseFunction (or subclass) instances. + + Returns + ------- + The string representation of self. """ if self.base is not None and self.base not in baselist: emsg = "baselist does not include this BaseFunction's base function." - raise ValueError("emsg") + raise ValueError(emsg) lines = [] # Write function type - lines.append("function=%s" %repr(self.__class__.__name__)) - lines.append("module=%s" %repr(self.getmodule())) + lines.append("function=%s" % repr(self.__class__.__name__)) + lines.append("module=%s" % repr(self.getmodule())) # Write base if self.base is not None: - lines.append("base=%s" %repr(baselist.index(self.base))) + lines.append("base=%s" % repr(baselist.index(self.base))) else: - lines.append("base=%s" %repr(None)) + lines.append("base=%s" % repr(None)) # Write all other metadata - for k, (v, f) in self.metadict.iteritems(): - lines.append("%s=%s" %(k, f(v))) - datastring = "\n".join(lines)+"\n" + for k, (v, f) in self.metadict.items(): + lines.append("%s=%s" % (k, f(v))) + datastring = "\n".join(lines) + "\n" return datastring @staticmethod @@ -359,27 +412,34 @@ def factory(functionstr, baselist): index of that instance in baselist. Parameters - functionstr - The string representation of the BaseFunction instance - baselist - List of BaseFunction (or subclass) instances. + ---------- + functionstr : str + The string representation of the BaseFunction instance + baselist : array-like + The list of BaseFunction (or subclass) instances. + + Returns + Basefunction instance + The BaseFunction instance based on the parameter strings """ data = functionstr.splitlines() data = "\n".join(data) # populate dictionary with parameter definition # "key=value"->{"key":"value"} - data = re.split(r'(?:[\r\n]+|\A)(\S+)=', data) + data = re.split(r"(?:[\r\n]+|\A)(\S+)=", data) ddict = {} - for i in range(len(data)/2): - ddict[data[2*i+1]] = data[2*i+2] + for i in range(len(data) // 2): + ddict[data[2 * i + 1]] = data[2 * i + 2] # dictionary of parameters pdict = {} - for (k, v) in ddict.items(): + for k, v in ddict.items(): try: pdict[k] = eval(v) - except Exception, e: + except Exception as e: logger.exception(e) - emsg = ("Invalid parameter: %s=%s" %(k,v)) + emsg = "Invalid parameter: %s=%s" % (k, v) raise SrMiseDataFormatError(emsg) function_name = pdict["function"] @@ -428,8 +488,16 @@ def safefunction(f, fsafe): Does not handle circular dependencies. Parameters - f: A BaseFunction instance - fsafe: List of BaseFunction instances being built.""" + ---------- + f : BaseFunction instance + The BaseFunction instance + fsafe : array-like + The list of BaseFunction instances being built. + + Returns + ------- + None + """ if f not in fsafe: if f.base is not None: BaseFunction.safefunction(f.base, fsafe) @@ -438,11 +506,12 @@ def safefunction(f, fsafe): return -#end of class BaseFunction +# end of class BaseFunction -if __name__ == '__main__': +if __name__ == "__main__": - from diffpy.srmise.peaks import GaussianOverR, TerminationRipples + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR + from diffpy.srmise.peaks.terminationripples import TerminationRipples p = GaussianOverR(0.8) outstr = p.writestr([]) @@ -451,7 +520,7 @@ def safefunction(f, fsafe): pt = TerminationRipples(p, 20) outstr2 = pt.writestr([p]) - print outstr + print(outstr) pt2 = BaseFunction.factory(outstr2, [p]) - print type(pt2) + print(type(pt2)) diff --git a/diffpy/srmise/applications/__init__.py b/src/diffpy/srmise/baselines/__init__.py similarity index 81% rename from diffpy/srmise/applications/__init__.py rename to src/diffpy/srmise/baselines/__init__.py index e9f32f6..5540acb 100644 --- a/diffpy/srmise/applications/__init__.py +++ b/src/diffpy/srmise/baselines/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund diff --git a/diffpy/srmise/baselines/arbitrary.py b/src/diffpy/srmise/baselines/arbitrary.py similarity index 56% rename from diffpy/srmise/baselines/arbitrary.py rename to src/diffpy/srmise/baselines/arbitrary.py index 2588740..80c1055 100644 --- a/diffpy/srmise/baselines/arbitrary.py +++ b/src/diffpy/srmise/baselines/arbitrary.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -13,16 +14,16 @@ import logging -import matplotlib.pyplot as plt import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.baselines.base import BaselineFunction +from diffpy.srmise.baselines.polynomial import Polynomial from diffpy.srmise.srmiseerrors import SrMiseEstimationError logger = logging.getLogger("diffpy.srmise") -class Arbitrary (BaselineFunction): + +class Arbitrary(BaselineFunction): """Methods for evaluating a baseline from an arbitrary function. Supports baseline calculations with arbitrary functions. These functions, @@ -42,15 +43,18 @@ def __init__(self, npars, valuef, jacobianf=None, estimatef=None, Cache=None): """Initialize an arbitrary baseline. Parameters - npars: Number of parameters which define the function - valuef: Function which calculates the value of the baseline - at x. - jacobianf: (None) Function which calculates the Jacobian of the + ---------- + npars : int + The number of parameters which define the function + valuef : array-like or int + The function which calculates the value of the baseline at x. + jacobianf : array-like or None + The function which calculates the Jacobian of the baseline function with respect to free pars. - estimatef: (None) Function which estimates function parameters given the - data x and y. - Cache: (None) A class (not instance) which implements caching of - BaseFunction evaluations. + estimatef : array-like or None + The function which estimates function parameters given the data x and y. + Cache : None or callable + The class (not instance) which implements caching of BaseFunction evaluations. """ # Guarantee valid number of parameters try: @@ -64,10 +68,10 @@ def __init__(self, npars, valuef, jacobianf=None, estimatef=None, Cache=None): # Define parameterdict # e.g. {"a_0":0, "a_1":1, "a_2":2, "a_3":3} if npars is 4. parameterdict = {} - for d in range(self.testnpars+1): - parameterdict["a_"+str(d)] = d - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + for d in range(testnpars + 1): + parameterdict["a_" + str(d)] = d + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} # Check that the provided functions are at least callable if valuef is None or callable(valuef): @@ -88,24 +92,31 @@ def __init__(self, npars, valuef, jacobianf=None, estimatef=None, Cache=None): # TODO: figure out how the metadict can be used to save the functions # and use them again when a file is loaded... - metadict = {} - metadict["npars"] = (npars, repr) - metadict["valuef"] = (valuef, repr) - metadict["jacobianf"] = (jacobianf, repr) - metadict["estimatef"] = (estimatef, repr) + metadict = { + "npars": (npars, repr), + "valuef": (valuef, repr), + "jacobianf": (jacobianf, repr), + "estimatef": (estimatef, repr), + } BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) - #### Methods required by BaselineFunction #### + # Methods required by BaselineFunction #### def estimate_parameters(self, r, y): """Estimate parameters for data baseline. Parameters - r: (Numpy array) Data along r from which to estimate - y: (Numpy array) Data along y from which to estimate + ---------- + r : array-like + The data along r from which to estimate + y : array-like + The data along y from which to estimate + + Returns + ------- + The numpy array of parameters in the default internal format. - Returns Numpy array of parameters in the default internal format. - Raises NotImplementedError if no estimation routine is defined, and + we raise NotImplementedError if no estimation routine is defined, and SrMiseEstimationError if parameters cannot be estimated for any other.""" if self.estimatef is None: emsg = "No estimation routine provided to Arbitrary." @@ -114,33 +125,42 @@ def estimate_parameters(self, r, y): # TODO: check that estimatef returns something proper? try: return self.estimatef(r, y) - except Exception, e: - emsg = "Error within estimation routine provided to Arbitrary:\n"+\ - str(e) + except Exception as e: + emsg = "Error within estimation routine provided to Arbitrary:\n" + str(e) raise SrMiseEstimationError(emsg) def _jacobianraw(self, pars, r, free): """Return the Jacobian of a polynomial. Parameters - pars: Sequence of parameters - pars[0] = a_0 - pars[1] = a_1 - ... - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation.""" + ---------- + pars : array-like + The sequence of parameters + pars[0] = a_0 + pars[1] = a_1 + ... + r : array-like or int + The sequence or scalar over which pars is evaluated + free : array-like of bools + The sequence of booleans which determines which derivatives are needed. + True for evaluation, False for no evaluation. + + Returns + ------- + numpy.ndarray + The Jacobian of polynomial with respect to free pars. + """ nfree = None if self.jacobianf is None: - nfree = (pars == True).sum() + nfree = (pars is True).sum() if nfree != 0: emsg = "No jacobian routine provided to Arbitrary." raise NotImplementedError(emsg) if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) # Allow an arbitrary function without a Jacobian provided act as @@ -149,7 +169,7 @@ def _jacobianraw(self, pars, r, free): # large performance implications if all other functions used while # fitting a function define a Jacobian. if nfree == 0: - return [None for p in range(len(par))] + return [None for p in range(len(pars))] # TODO: check that jacobianf returns something proper? return self.jacobianf(pars, r, free) @@ -158,41 +178,63 @@ def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [a_0, a_1, ...]""" + ---------- + pars : array-like + The sequence of parameters + in_format : internal + The format defined for this class + out_format: internal + The format defined for this class + + Defined Format + -------------- + internal: [a_0, a_1, ...] + + Returns + ------- + numpy.ndarray + The standard output of transformed parameters + """ temp = np.array(pars) # Convert to intermediate format "internal" if in_format == "internal": pass else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): - """Return value of polynomial for the given parameters and r values. + """Compute the value of the polynomial given a set of parameters and evaluation points. + + This method ensures that the input parameters conform to the expected count + and then delegates the computation to an internal method `valuef`. Parameters - Parameters - pars: Sequence of parameters - pars[0] = a_0 - pars[1] = a_1 - ... - r: sequence or scalar over which pars is evaluated""" + ---------- + pars : array_like + The sequence of coefficients for the polynomial where each element corresponds to: + - pars[0] = a_0, the constant term + - pars[1] = a_1, the coefficient of the first degree term, and so on. + The length of `pars` must match the expected number of parameters defined in the class. + + r : array_like or float + The sequence of points or a single point at which the polynomial is to be evaluated. + If a scalar is provided, it will be treated as a single point for evaluation. + + Returns + ------- + ndarray or float + The computed values of the polynomial for each point in `r`. + """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) # TODO: check that valuef returns something proper? @@ -201,21 +243,22 @@ def _valueraw(self, pars, r): def getmodule(self): return __name__ -#end of class Polynomial + +# end of class Polynomial # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - f = Polynomial(degree = 3) + f = Polynomial(degree=3) r = np.arange(5) pars = np.array([3, 0, 1, 2]) free = np.array([True, False, True, True]) - print f._valueraw(pars, r) - print f._jacobianraw(pars, r, free) + print(f._valueraw(pars, r)) + print(f._jacobianraw(pars, r, free)) - f = Polynomial(degree = -1) + f = Polynomial(degree=-1) r = np.arange(5) pars = np.array([]) free = np.array([]) - print f._valueraw(pars, r) - print f._jacobianraw(pars, r, free) + print(f._valueraw(pars, r)) + print(f._jacobianraw(pars, r, free)) diff --git a/src/diffpy/srmise/baselines/base.py b/src/diffpy/srmise/baselines/base.py new file mode 100644 index 0000000..c1ecf0e --- /dev/null +++ b/src/diffpy/srmise/baselines/base.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## + +import logging + +import numpy as np + +from diffpy.srmise.basefunction import BaseFunction +from diffpy.srmise.modelparts import ModelPart +from diffpy.srmise.srmiseerrors import SrMiseDataFormatError + +logger = logging.getLogger("diffpy.srmise") + + +class BaselineFunction(BaseFunction): + """Base class for functions which represent some data's baseline term. + + Class members + ------------- + parameterdict: dict + The dictionary mapping string keys to their index in the + sequence of parameters. These keys apply only to + the default "internal" format. + parformats: array-like + The sequence of strings defining what formats are recognized + by a baseline function. + default_formats: dict + The dictionary which maps the strings "default_input" and + "default_output" to strings also appearing in parformats. + "default_input"-> format used internally within the class + "default_output"-> Default format to use when converting + parameters for outside use. + + Class methods (implemented by inheriting classes) + ------------------------------------------------- + estimate_parameters() (optional) + _jacobianraw() (optional, but strongly recommended) + _transform_derivativesraw() (optional, supports propagation of uncertainty for different paramaterizations) + _transform_parametersraw() + _valueraw() + + Class methods + ------------- + actualize() + + Inherited methods + ----------------- + jacobian() + value() + transform_derivatives() + transform_parameters() + """ + + def __init__( + self, + parameterdict, + parformats, + default_formats, + metadict, + base=None, + Cache=None, + ): + """Set parameterdict defined by subclass + + parameterdict : dict + The dictionary mapping string keys to their index in a + sequence of parameters for this BaselineFunction subclass. + parformats : array-like + The sequence strings containing all allowed input/output + formats defined for the peak function's parameters. + default_formats : dict + The dictionary mapping the string keys "internal" and + default_output" to formats from parformats. + metadict: dict + The dictionary mapping string keys to tuple (v, m) where v is an + additional argument required by function, and m is a method + whose string output recreates v when passed to eval(). + base : The basefunction subclass + The basefunction subclass instance which this one decorates with + additional functionality. + Cache : class + The class (not instance) which implements caching of BaseFunction + evaluations.""" + BaseFunction.__init__(self, parameterdict, parformats, default_formats, metadict, base, Cache) + + # "Virtual" class methods #### + + # Methods required by BaseFunction #### + + def actualize( + self, + pars, + in_format="default_input", + free=None, + removable=False, + static_owner=False, + ): + converted = self.transform_parameters(pars, in_format, out_format="internal") + return Baseline(self, converted, free, removable, static_owner) + + def getmodule(self): + return __name__ + + +# end of class BaselineFunction + + +class Baseline(ModelPart): + """Represents a baseline associated with a BaselineFunction subclass.""" + + def __init__(self, owner, pars, free=None, removable=False, static_owner=False): + """Initialize the BaselineComponent instance with specified configurations. + + Parameters + ---------- + owner : BaselineFunction subclass instance + The owner object which is an instance of a subclass of BaselineFunction. + pars : array-like + The sequence of parameters defining the characteristics of the baseline. + free : Sequence of bool, optional + The sequence parallel to `pars` where each boolean value indicates whether + the corresponding parameter is adjustable. If False, that parameter is fixed. + Defaults to None, implying all parameters are free by default. + removable : bool, optional + A flag indicating whether the baseline can be removed during processing. + Defaults to False. + static_owner : bool, optional + Determines if the owner of the baseline can be altered using the + ` changeowner()` method. Defaults to False. + + Notes + ----- + - The `free` and `removable` parameters are independent; a baseline can be marked + as removable even if some of its parameters are fixed (`free` is False). In such + cases, the baseline may be removed during peak extraction, but the fixed + parameters will persist until removal. + """ + ModelPart.__init__(self, owner, pars, free, removable, static_owner) + + @staticmethod + def factory(baselinestr, ownerlist): + """Instantiate a Peak from a string. + + Parameters + ---------- + baselinestr : str + The string representing Baseline + ownerlist : array-like + The list of BaseFunctions that owner is in + """ + + data = baselinestr.strip().splitlines() + + # dictionary of parameters + pdict = {} + for d in data: + result = d.split("=", 1) + if len(result) == 2: + try: + pdict[result[0]] = eval(result[1]) + except Exception: + emsg = "Invalid parameter: %s" % d + raise SrMiseDataFormatError(emsg) + else: + emsg = "Invalid parameter: %s" % d + raise SrMiseDataFormatError(emsg) + + # Correctly initialize the base function, if one exists. + idx = pdict["owner"] + if idx > len(ownerlist): + emsg = "Dependent base function not in ownerlist." + raise ValueError(emsg) + pdict["owner"] = ownerlist[idx] + + return Baseline(**pdict) + + +# End of class Baseline + +# simple test code +if __name__ == "__main__": + + from numpy.random import randn + + from diffpy.srmise.modelevaluators.aicc import AICc + from diffpy.srmise.peaks.base import Peaks + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR + + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = GaussianOverR(0.7) + evaluator = AICc() + + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] + ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) diff --git a/src/diffpy/srmise/baselines/fromsequence.py b/src/diffpy/srmise/baselines/fromsequence.py new file mode 100644 index 0000000..1866441 --- /dev/null +++ b/src/diffpy/srmise/baselines/fromsequence.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## + +import logging + +import numpy as np +import scipy.interpolate as spi + +from diffpy.srmise.baselines.base import BaselineFunction + +logger = logging.getLogger("diffpy.srmise") + + +class FromSequence(BaselineFunction): + """Methods for evaluation of a baseline from discrete data via interpolation. + + FromSequence uses cubic spline interpolation (no smoothing) on discrete + points to approximate the baseline at arbitrary points within the + interpolation domain. This baseline function permits no free parameters.""" + + def __init__(self, *args, **kwds): + """Initialize a baseline object based on input sequences `x` and `y`. + + This class provides two ways to initialize: by directly providing the sequences or by + specifying a file that contains the sequences. + + Parameters + ---------- + *args : tuple + The variable length argument list. Can be used to pass `x` and `y` sequences directly. + + **kwds : dict + The arbitrary keyword arguments. Can be used to specify `x`, `y` sequences or a `file` name. + + x : array_like, optional + The sequence of x-values defining the baseline. Can be passed as a positional argument or via keyword. + + y : array_like, optional + The sequence of y-values defining the baseline. Must be provided alongside `x`. + + file : str, optional + The name of the file containing two columns: one for x-values and one for y-values. + + Usage + ----- + 1. Directly with sequences: + - `FromSequence(xlist, ylist)` + - `FromSequence(x=xlist, y=ylist)` + + 2. From a file: + - `FromSequence("filename")` + - `FromSequence(file="filename")` + """ + if len(args) == 1 and len(kwds) == 0: + # load from file + x, y = self.readxy(args[0]) + elif len(args) == 0 and ("file" in kwds and "x" not in kwds and "y" not in kwds): + # load file + x, y = self.readxy(kwds["file"]) + elif len(args) == 2 and len(kwds) == 0: + # Load x, y directly from arguments + x = args[0] + y = args[1] + elif len(args) == 0 and ("x" in kwds and "y" in kwds and "file" not in kwds): + # Load x, y from keywords + x = kwds["x"] + y = kwds["y"] + else: + emsg = "Call to FromSequence does not match any allowed signature." + raise TypeError(emsg) + + # Guarantee valid lengths + if len(x) != len(y): + emsg = "Sequences x and y must have the same length." + raise ValueError(emsg) + parameterdict = {} + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} + self.spline = spi.InterpolatedUnivariateSpline(x, y) + self.minx = x[0] + self.maxx = x[-1] + metadict = {} + metadict["x"] = (x, self.xyrepr) + metadict["y"] = (y, self.xyrepr) + BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache=None) + + # Methods required by BaselineFunction #### + + def estimate_parameters(self, r, y): + """Return empty numpy array. + + A FromSequence object has no free parameters, so there is nothing + to estimate. + + Parameters + ---------- + r : array-like + The data along r from which to estimate, Ignored + y : array-like + The data along y from which to estimate, Ignored + + Returns + ------- + array-like + The empty numpy array + """ + return np.array([]) + + def _jacobianraw(self, pars, r, free): + """Return []. + + A FromSequence baseline has no parameters. + + Parameters + ---------- + pars :array-like + The empty sequence + r : array-like + The sequence or scalar over which pars is evaluated + free : array-like + The empty sequence. + + Returns + ------- + array-like + The empty numpy array + """ + if len(pars) != self.npars: + emsg = "Argument pars must have " + str(self.npars) + " elements." + raise ValueError(emsg) + if len(free) != self.npars: + emsg = "Argument free must have " + str(self.npars) + " elements." + raise ValueError(emsg) + return [] + + def _transform_parametersraw(self, pars, in_format, out_format): + """Convert parameter values from in_format to out_format. + + Parameters + ---------- + pars : array-like + The sequence of parameters + in_format : str + The format defined for this class + out_format : str + The format defined for this class + + Defined Formats + --------------- + n/a, FromSequence has no parameters + + Returns + ------- + array-like + The sequence of parameters converted to out_format + """ + temp = np.array(pars) + + # Convert to intermediate format "internal" + if in_format == "internal": + pass + else: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + + # Convert to specified output format from "internal" format. + if out_format == "internal": + pass + else: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + return temp + + def _valueraw(self, pars, r): + """Return value of polynomial for the given parameters and r values. + + Parameters + ---------- + pars : array-like + The empty sequence + r : array-like + The sequence or scalar over which pars is evaluated + + Returns + ------- + float + The value of the polynomial for the given parameters + """ + if len(pars) != self.npars: + emsg = "Argument pars must have " + str(self.npars) + " elements." + raise ValueError(emsg) + try: + if r[0] < self.minx or r[-1] > self.maxx: + logger.warning( + "Warning: Evaluating interpolating function over %s, outside safe range of %s.", + [r[0], r[-1]], + [self.minx, self.maxx], + ) + except (IndexError, TypeError): + if r < self.minx or r > self.maxx: + logger.warning( + "Warning: Evaluating interpolating function at %s, outside safe range of %s.", + r, + [self.minx, self.maxx], + ) + return self.spline(r) + + def getmodule(self): + return __name__ + + def xyrepr(self, var): + """Safe string output of x and y, compatible with eval() + + Parameters + ---------- + var : array-like + The sequence or scalar over which to evaluate + + Returns + ------- + str + The x and y values of the given variable + """ + return "[%s]" % ", ".join([repr(v) for v in var]) + + def readxy(self, filename): + """ """ + from diffpy.srmise.srmiseerrors import SrMiseDataFormatError + + # TODO: Make this safer + try: + datastring = open(filename, "rb").read() + except Exception as err: + raise err + + import re + + res = re.search(rb"^[^#]", datastring, re.M) + if res: + datastring = datastring[res.end() :].strip() + + x = [] + y = [] + + try: + for line in datastring.split(b"\n"): + v = line.split() + x.append(float(v[0])) + y.append(float(v[1])) + except (ValueError, IndexError) as err: + raise SrMiseDataFormatError(str(err)) + + return (np.array(x), np.array(y)) + + +# end of class FromSequence + +# simple test code +if __name__ == "__main__": + + r = np.arange(0, 9.42413, 0.2) + b = -(np.tanh(0.5 * r) + np.sin(0.5 * r)) + f = FromSequence(r, b) + pars = np.array([]) + free = np.array([]) + + r2 = np.arange(0, 9.42413, 0.5) + b2 = f._valueraw(pars, r2) diff --git a/src/diffpy/srmise/baselines/nanospherical.py b/src/diffpy/srmise/baselines/nanospherical.py new file mode 100644 index 0000000..a76719f --- /dev/null +++ b/src/diffpy/srmise/baselines/nanospherical.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## + +import logging + +import numpy as np + +from diffpy.srmise.baselines.base import BaselineFunction + +logger = logging.getLogger("diffpy.srmise") + + +class NanoSpherical(BaselineFunction): + """Methods for evaluation of baseline of spherical nanoparticle of uniform density. + + Allowed formats are + internal: [scale, radius] + + Given nanoparticle radius R, the baseline is -scale*r*(1-(3r)/(4R)+(r^3)/(16*R^3)) in the + interval (0, abs(R)), and 0 elsewhere. Internally, both scale and radius are unconstrained, + but negative values are mapped to their physically meaningful positive equivalents. + + The expression in parentheses is gamma_0(r) for a sphere. For a well normalized PDF the + scale factor is 4*pi*rho_0, where rho_r is the nanoparticle density. + + gamma_0(r) Reference: + Guinier et al. (1955). Small-angle Scattering from X-rays. New York: John Wiley & Sons, Inc. + """ + + def __init__(self, Cache=None): + """Initialize a spherical nanoparticle baseline. + + Parameters + ---------- + Cache : class + THe class (not instance) which implements caching of BaseFunction + evaluations. + """ + # Define parameterdict + parameterdict = {"scale": 0, "radius": 1} + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} + metadict = {} + BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) + + # Methods required by BaselineFunction #### + + # def estimate_parameters(self, r, y): + # """Estimate parameters for spherical baseline. (Not implemented!) + # + # Parameters + # r - array along r from which to estimate + # y - array along y from which to estimate + # + # Returns Numpy array of parameters in the default internal format. + # Raises NotImplementedError if estimation is not implemented for this + # degree, or SrMiseEstimationError if parameters cannot be estimated for + # any other reason. + # """ + # if len(r) != len(y): + # emsg = "Arrays r, y must have equal length." + # raise ValueError(emsg) + + def _jacobianraw(self, pars, r, free): + """Return the Jacobian of the spherical baseline. + + Parameters + ---------- + pars : array-like + The Sequence of parameters for a spherical baseline + pars[0] = scale + pars[1] = radius + r : array-like + The sequence or scalar over which pars is evaluated. + free : bool + The sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation. + + Returns + ------- + array-like + The Jacobian of the nanospherical baseline. + """ + if len(pars) != self.npars: + emsg = "Argument pars must have " + str(self.npars) + " elements." + raise ValueError(emsg) + if len(free) != self.npars: + emsg = "Argument free must have " + str(self.npars) + " elements." + raise ValueError(emsg) + jacobian = [None for p in range(self.npars)] + if np.sum(np.logical_not(free)) == self.npars: + return jacobian + + if np.isscalar(r): + if r <= 0.0 or r >= 2.0 * pars[1]: + if free[0]: + jacobian[0] = 0.0 + if free[1]: + jacobian[1] = 0.0 + else: + if free[0]: + jacobian[0] = self._jacobianrawscale(pars, r) + if free[1]: + jacobian[1] = self._jacobianrawradius(pars, r) + else: + s = self._getdomain(pars, r) + if free[0]: + jacobian[0] = np.zeros(len(r)) + jacobian[0][s] = self._jacobianrawscale(pars, r[s]) + if free[1]: + jacobian[1] = np.zeros(len(r)) + jacobian[1][s] = self._jacobianrawradius(pars, r[s]) + return jacobian + + def _jacobianrawscale(self, pars, r): + """Return partial Jacobian wrt scale without bounds checking. + + Parameters + ---------- + pars : array-like + The sequence of parameters for a spherical baseline + pars[0] = scale + pars[1] = radius + r : array-like + The sequence or scalar over which pars is evaluated. + + Returns + ------- + array-like + The partial Jacobian of the nanoparticle baseline wrt scale without bounds checking. + """ + np.abs(pars[0]) + R = np.abs(pars[1]) + rdivR = r / R + # From abs'(s) in derivative, which is equivalent to sign(s) except at 0 where it + # is undefined. Since s=0 is equivalent to the absence of a nanoparticle, sign will + # be fine. + sign = np.sign(pars[1]) + return -sign * r * (1 - (3.0 / 4.0) * rdivR + (1.0 / 16.0) * rdivR**3) + + def _jacobianrawradius(self, pars, r): + """Return partial Jacobian wrt radius without bounds checking. + + Parameters + ---------- + pars : array-like + The Sequence of parameters for a spherical baseline + pars[0] = scale + pars[1] = radius + r : array-like + The sequence or scalar over which pars is evaluated. + + Returns + ------- + array-like + The partial Jacobian of the nanoparticle baseline wrt radius without bounds checking. + """ + s = np.abs(pars[0]) + R = np.abs(pars[1]) + # From abs'(R) in derivative, which is equivalent to sign(R) except at 0 where it + # is undefined. Since R=0 is a singularity anyway, sign will be fine. + sign = np.sign(pars[1]) + return sign * s * (3 * r**2 * (r**2 - 4 * R**2)) / (16 * R**4) + + def _transform_parametersraw(self, pars, in_format, out_format): + """Convert parameter values from in_format to out_format. + + Parameters + ---------- + pars : array-like + The sequence of parameters + in_format : str + The format defined for this class + out_format : str + The format defined for this class + + Defined Formats + --------------- + internal - [scale, radius] + + Returns + ------- + array-like + The transformed parameter values with out_format. + """ + temp = np.array(pars) + + # Convert to intermediate format "internal" + if in_format == "internal": + # Map both scale and radius to their positive equivalents + temp[0] = np.abs(temp[0]) + temp[1] = np.abs(temp[1]) + else: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + + # Convert to specified output format from "internal" format. + if out_format == "internal": + pass + else: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + return temp + + def _valueraw(self, pars, r): + """Return value of spherical baseline for the given parameters and r values. + + Outside the interval [0, radius] the baseline is 0. + + Parameters + ---------- + pars : array-like + The sequence of parameters for a spherical baseline + pars[0] = scale + pars[1] = radius + r : array-like + The sequence or scalar over which pars is evaluated. + + Returns + ------- + float + The value of the spherical baseline. + """ + if len(pars) != self.npars: + emsg = "Argument pars must have " + str(self.npars) + " elements." + raise ValueError(emsg) + if np.isscalar(r): + if r <= 0.0 or r >= 2.0 * pars[1]: + return 0.0 + else: + return self._valueraw2(pars, r) + else: + out = np.zeros(len(r)) + s = self._getdomain(pars, r) + out[s] = self._valueraw2(pars, r[s]) + return out + + def _valueraw2(self, pars, r): + """Return value of spherical baseline without bounds checking for given parameters and r values. + + Parameters + ---------- + pars : array-like + The sequence of parameters for a spherical baseline + pars[0] = scale + pars[1] = radius + r : array-like + The sequence or scalar over which pars is evaluated. + + Returns + ------- + float + The value of spherical baseline without bounds checking for given parameters and r values + """ + s = np.abs(pars[0]) + R = np.abs(pars[1]) + rdivR = r / R + return -s * r * (1 - (3.0 / 4.0) * rdivR + (1.0 / 16.0) * rdivR**3) + + def _getdomain(self, pars, r): + """Return slice object for which r > 0 and r < twice the radius + + Parameters + ---------- + pars : array-like + The sequence of parameters for a spherical baseline + r : array-like + The sequence or scalar over which pars is evaluated. + + Returns + ------- + slice object + The slice object for which r > 0 and r < twice the radius + """ + low = r.searchsorted(0.0, side="right") + high = r.searchsorted(2.0 * pars[1], side="left") + return slice(low, high) + + def getmodule(self): + return __name__ + + +# end of class NanoSpherical + +# simple test code +if __name__ == "__main__": + + f = NanoSpherical() + r = np.arange(-5, 10) + pars = np.array([-1.0, 7.0]) + free = np.array([False, True]) + print("Testing nanoparticle spherical baseline") + print("Scale: %f, Radius: %f" % (pars[0], pars[1])) + print("-----------------------------------------") + val = f._valueraw(pars, r) + jac = f._jacobianraw(pars, r, free) + outjac = [j if j is not None else [None] * len(r) for j in jac] + print( + "r".center(10), + "value".center(10), + "jac(scale)".center(10), + "jac(radius)".center(10), + ) + for tup in zip(r, val, *outjac): + for t in tup: + if t is None: + print(f"{None}".ljust(10)) + else: + print(f"{t:.3g}".ljust(10)) diff --git a/src/diffpy/srmise/baselines/polynomial.py b/src/diffpy/srmise/baselines/polynomial.py new file mode 100644 index 0000000..0f4f877 --- /dev/null +++ b/src/diffpy/srmise/baselines/polynomial.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## + +import logging + +import numpy as np + +from diffpy.srmise.baselines.base import BaselineFunction +from diffpy.srmise.srmiseerrors import SrMiseEstimationError + +logger = logging.getLogger("diffpy.srmise") + + +class Polynomial(BaselineFunction): + """Methods for evaluation and parameter estimation of a polynomial baseline.""" + + def __init__(self, degree, Cache=None): + """Initialize a polynomial function of degree d. + + Parameters + ---------- + degree: int + The degree of the polynomial. Any negative value is interpreted + as the polynomial of negative infinite degree. + Cache: class + The class (not instance) which implements caching of BaseFunction + evaluations. + """ + # Guarantee valid degree + try: + self.degree = int(str(degree)) + except ValueError: + emsg = "Argument degree must be an integer." + raise ValueError(emsg) + if self.degree < 0: + self.degree = -1 # interpreted as negative infinity + # Define parameterdict + # e.g. {"a_0":3, "a_1":2, "a_2":1, "a_3":0} if degree is 3. + parameterdict = {} + for d in range(self.degree + 1): + parameterdict["a_" + str(d)] = self.degree - d + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} + metadict = {"degree": (degree, repr)} + BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) + + # Methods required by BaselineFunction #### + + def estimate_parameters(self, r, y): + """Estimate parameters for polynomial baseline. + + Estimation is currently implemented only for degree < 2. This + very rudimentary method assumes the baseline crosses the origin, and + y=baseline+signal, where signal is primarily positive. + + Parameters + ---------- + r : array-like + The data along r from which to estimate + y : array-like + The data along y from which to estimate + + Returns + ------- + array-like + The Numpy array of parameters in the default internal format. + Raises NotImplementedError if estimation is not implemented for this + degree, or SrMiseEstimationError if parameters cannot be estimated for + any other reason. + """ + if self.degree > 1: + emsg = "Polynomial implements estimation for baselines of degree <= 1 only." + raise NotImplementedError(emsg) + if len(r) != len(y): + emsg = "Arrays r, y must have equal length." + raise ValueError(emsg) + + if self.degree == -1: + return np.array([]) + + if self.degree == 0: + return np.array([0.0]) + + if self.degree == 1: + # Estimate degree=1 baseline. + # Find best slope for y=slope*r using only the least 10% of all + # points, assuming the non-baseline component of the data largely + # lies above the baseline. + # TODO: Make this more sophisticated. + try: + cut = np.max([len(y) / 10, 1]) + cut_idx = y.argsort()[: int(cut)] + + import numpy.linalg as la + + a = np.array([r[cut_idx]]).T + slope = la.lstsq(a, y[cut_idx])[0][0] + return np.array([slope, 0.0]) + except Exception as e: + emsg = "Error during estimation -- " + str(e) + raise SrMiseEstimationError(emsg) + + def _jacobianraw(self, pars, r, free): + """Return the Jacobian of a polynomial. + + Parameters + ---------- + pars : array-like + The sequence of parameters for a polynomial of degree d + pars[0] = a_degree + pars[1] = a_(degree-1) + ... + pars[d] = a_0 + r : array-like + The sequence or scalar over which pars is evaluated + free : bool + The sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation. + + Returns + ------- + jacobian: array-like + The Jacobian of polynomial with degree d + """ + if len(pars) != self.npars: + emsg = "Argument pars must have " + str(self.npars) + " elements." + raise ValueError(emsg) + if len(free) != self.npars: + emsg = "Argument free must have " + str(self.npars) + " elements." + raise ValueError(emsg) + jacobian = [None for p in range(self.npars)] + if np.sum(np.logical_not(free)) == self.npars: + return jacobian + + # The partial derivative with respect to the nth coefficient of a + # polynomial is just x^nth. + for idx in range(self.npars): + if free[idx]: + jacobian[idx] = np.power(r, idx) + return jacobian + + def _transform_parametersraw(self, pars, in_format, out_format): + """Convert parameter values from in_format to out_format. + + Parameters + pars : array-like + The sequence of parameters + in_format : str + The format defined for this class + out_format : str + The format defined for this class + + Defined Formats + --------------- + internal: [a_degree, a_(degree-1), ..., a_0] + + Returns + ------- + array-like + The transformed parameters in out_format + """ + temp = np.array(pars) + + # Convert to intermediate format "internal" + if in_format == "internal": + pass + else: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + + # Convert to specified output format from "internal" format. + if out_format == "internal": + pass + else: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + return temp + + def _valueraw(self, pars, r): + """Return value of polynomial for the given parameters and r values. + + Parameters + ---------- + pars : array-like + The sequence of parameters for a polynomial of degree d + pars[0] = a_degree + pars[1] = a_(degree-1) + ... + pars[d] = a_0 + If degree is negative infinity, pars is an empty sequence. + r : array-like + The sequence or scalar over which pars is evaluated + + Returns + ------- + float + The value of polynomial for the given parameters and r values. + """ + if len(pars) != self.npars: + emsg = "Argument pars must have " + str(self.npars) + " elements." + raise ValueError(emsg) + return np.polyval(pars, r) + + def getmodule(self): + return __name__ + + +# end of class Polynomial + +# simple test code +if __name__ == "__main__": + + # Test polynomial of degree 3 + print("Testing degree 3 polynomial") + print("---------------------------") + f = Polynomial(degree=3) + r = np.arange(5) + pars = np.array([3, 0, 1, 2]) + free = np.array([True, False, True, True]) + val = f._valueraw(pars, r) + jac = f._jacobianraw(pars, r, free) + print("Value:\n", val) + print("Jacobian: ") + for j in jac: + print(" %s" % j) + + # Test polynomial of degree -oo + print("\nTesting degree -oo polynomial (== 0)") + print("------------------------------------") + f = Polynomial(degree=-1) + r = np.arange(5) + pars = np.array([]) + free = np.array([]) + val = f._valueraw(pars, r) + jac = f._jacobianraw(pars, r, free) + print("Value:\n", val) + print("Jacobian: ") + for j in jac: + print(" %s" % j) + + # Test linear estimation + print("\nTesting linear baseline estimation") + print("------------------------------------") + f = Polynomial(degree=1) + pars = np.array([1, 0]) + r = np.arange(0, 10, 0.1) + y = -r + 10 * np.exp(-((r - 5) ** 2)) + np.random.rand(len(r)) + est = f.estimate_parameters(r, y) + print("Actual baseline: ", np.array([-1, 0.0])) + print("Estimated baseline: ", est) diff --git a/diffpy/srmise/dataclusters.py b/src/diffpy/srmise/dataclusters.py similarity index 55% rename from diffpy/srmise/dataclusters.py rename to src/diffpy/srmise/dataclusters.py index 077c1a8..af0c4e5 100644 --- a/diffpy/srmise/dataclusters.py +++ b/src/diffpy/srmise/dataclusters.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -17,65 +18,104 @@ import matplotlib.pyplot as plt import numpy as np -import diffpy.srmise.srmiselog - logger = logging.getLogger("diffpy.srmise") + class DataClusters: - """Find clusters corresponding to peaks in numerical x-, y-value arrays. + """Find clusters corresponding to peaks in the PDF (y-array) - DataClusters determines which points, given a pair of x- and y-value - sequences, roughly correspond to which visible peaks in that data. This - division is contiguous, with borders between clusters near relative + DataClusters determines which points in inter-atomic distane, r, + correspond to peaks in the PDF. The division between clusters + is contiguous, with borders between clusters likely near relative minima in the data. Clusters are iteratively formed around points with the largest - y-coordinates. New clusters are added only when the unclustered data + PDF values. New clusters are added only when the unclustered data point under consideration is greater than a given distance (the 'resolution') from the nearest existing cluster. Data members - x - sequence of x coordinates. - y - sequence of y values - res - clustering 'resolution' - data_order - array of x, y indices ordered by decreasing y - clusters - array of cluster ranges - current_idx - index of data_order currently considered + ------------ + x : array + The array of r values. + y : sequence of y values + The array of PDF values, G(r) + res : int + The clustering resolution, i.e., the number of points another point has to + be away from the center of an existing cluster to before a new cluster is + formed. A value of zero allows every point to be cluster. + data_order : array + The array of x, y indices ordered by decreasing y + clusters : + The array of cluster ranges + current_idx - int + The index of data_order currently considered """ def __init__(self, x, y, res): - """Initializes the data to be clustered, and the 'resolution' to use. + """Constructor Parameters - x - numeric sequence of x-value sorted in ascending order - y - corresponding sequence of y-values - res - clustering 'resolution' + ---------- + x : array + The array of r values. + y : sequence of y values + The array of PDF values, G(r) + res : int + The clustering resolution, i.e., the number of points another point has to + be away from the center of an existing cluster to before a new cluster is + formed. A value of zero allows every point to be cluster. """ - #Track internal state of clustering. + # Track internal state of clustering. self.INIT = 0 self.READY = 1 self.CLUSTERING = 2 self.DONE = 3 - - self.clear() - self.setdata(x, y, res) + self._clear() + self._setdata(x, y, res) return - # This iterator operates not over found clusters, but over the process of # clustering. This behavior could cause confusion and should perhaps be # altered. def __iter__(self): return self + def __eq__(self, other): + if not isinstance(other, DataClusters): + return False + return ( + np.array_equal(self.x, other.x) + and np.array_equal(self.y, other.y) + and np.array_equal(self.data_order, other.data_order) + and np.array_equal(self.clusters, other.clusters) + and self.res == other.res + and self.current_idx == other.current_idx + and self.lastcluster_idx == other.lastcluster_idx + and self.lastpoint_idx == other.lastpoint_idx + and self.status == other.status + and self.INIT == other.INIT + and self.READY == other.READY + and self.CLUSTERING == other.CLUSTERING + and self.DONE == other.DONE + ) + + def _clear(self): + """ + Clear all data and reset the cluster object to a transient initial state. + + The purpose of this method is to provide a clean state before creating new clustering operations. + The object is updated in-place and no new instance is returned. - def clear(self): - """Clear all members, including user data.""" + Returns + ------- + None + """ self.x = np.array([]) self.y = np.array([]) - self.data_order = np.array([], dtype=np.int32) - self.clusters = np.array([[]], dtype=np.int32) + self.data_order = np.array([]) + self.clusters = np.array([[]]) self.res = 0 self.current_idx = 0 self.lastcluster_idx = None @@ -85,7 +125,7 @@ def clear(self): def reset_clusters(self): """Reset all progress on clustering.""" - self.clusters = np.array([[self.data_order[-1],self.data_order[-1]]]) + self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) self.current_idx = self.data_order.size - 1 self.lastcluster_idx = 0 self.lastpoint_idx = self.data_order[-1] @@ -94,38 +134,45 @@ def reset_clusters(self): self.status = self.READY return - def setdata(self, x, y, res): + def _setdata(self, x, y, res): """Assign data members for x- and y-coordinates, and resolution. Parameters - x - numeric sequence of x-value sorted in ascending order - y - corresponding sequence of y-values - res - clustering 'resolution' + ---------- + x : array + The array of r values. + y : sequence of y values + The array of PDF values, G(r) + res : int + The clustering resolution, i.e., the number of points another point has to + be away from the center of an existing cluster to before a new cluster is + formed. A value of zero allows every point to be cluster. """ - #Test for error conditions - # 1) Length mismatch - # 2) Bound errors for res - # 3) r isn't sorted? if len(x) != len(y): raise ValueError("Sequences x and y must have the same length.") - if res <= 0: - raise ValueError("Resolution res must be greater than 0.") - # Test for sorting? - + if res < 0: + raise ValueError( + "Value of resolution parameter is less than zero. Please rerun specifying a non-negative res" + ) self.x = x self.y = y self.res = res - - self.data_order = self.y.argsort() # Defines order of clustering - self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) - self.current_idx = len(self.data_order) - 1 - self.lastcluster_idx = 0 - self.lastpoint_idx = self.data_order[-1] - - self.status = self.READY + if x.size == 0: + self.data_order = np.array([]) + self.clusters = np.array([[]]) + self.current_idx = 0 + self.lastpoint_idx = None + self.status = self.INIT + else: + self.data_order = self.y.argsort() + self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) + self.current_idx = len(self.data_order) - 1 + self.lastpoint_idx = self.data_order[-1] + self.status = self.READY + self.lastcluster_idx = None return - def next(self): + def __next__(self): """Cluster point with largest y-coordinate left, returning self. next() always adds at least one additional point to the existing @@ -169,9 +216,8 @@ def next(self): self.lastcluster_idx = nearest_cluster[0] else: # insert right of nearest cluster - self.lastcluster_idx = nearest_cluster[0]+1 - self.clusters = np.insert(self.clusters, self.lastcluster_idx, - [test_idx, test_idx], 0) + self.lastcluster_idx = nearest_cluster[0] + 1 + self.clusters = np.insert(self.clusters, int(self.lastcluster_idx), [test_idx, test_idx], 0) return self def makeclusters(self): @@ -200,10 +246,10 @@ def find_nearest_cluster2(self, x): return self.find_nearest_cluster(idx) else: # Choose adjacent index nearest to x - if (self.x[idx] - x) < (x - self.x[idx-1]): + if (self.x[idx] - x) < (x - self.x[idx - 1]): return self.find_nearest_cluster(idx) else: - return self.find_nearest_cluster(idx-1) + return self.find_nearest_cluster(idx - 1) def find_nearest_cluster(self, idx): """Return [cluster index, distance] for cluster nearest to x[idx]. @@ -225,23 +271,27 @@ def find_nearest_cluster(self, idx): return None flat_idx = clusters_flat.searchsorted(idx) - near_idx = flat_idx/2 + near_idx = flat_idx / 2 if flat_idx == len(clusters_flat): - #test_idx is right of the last cluster - return [near_idx-1, self.x[idx]-self.x[self.clusters[-1, 1]]] - if clusters_flat[flat_idx] == idx or flat_idx%2 == 1: + # test_idx is right of the last cluster + return [near_idx - 1, self.x[idx] - self.x[self.clusters[-1, 1]]] + if clusters_flat[flat_idx] == idx or flat_idx % 2 == 1: # idx is within some cluster return [near_idx, 0.0] if flat_idx == 0: # idx is left of the first cluster - return [near_idx, self.x[idx]-self.x[self.clusters[0,0]]] + return [near_idx, self.x[idx] - self.x[self.clusters[0, 0]]] # Calculate which of the two nearest clusters is closer - distances=np.array([self.x[idx]-self.x[self.clusters[near_idx-1, 1]], - self.x[idx]-self.x[self.clusters[near_idx, 0]]]) + distances = np.array( + [ + self.x[idx] - self.x[self.clusters[int(near_idx) - 1, 1]], + self.x[idx] - self.x[self.clusters[int(near_idx), 0]], + ] + ) if distances[0] < np.abs(distances[1]): - return [near_idx-1, distances[0]] + return [near_idx - 1, distances[0]] else: return [near_idx, distances[1]] @@ -255,15 +305,14 @@ def cluster_is_full(self, cluster_idx): cluster_idx - The index of the cluster to test """ if cluster_idx > 0: - low = self.clusters[cluster_idx-1, 1] + 1 + low = self.clusters[cluster_idx - 1, 1] + 1 else: low = 0 if cluster_idx < len(self.clusters) - 1: - high = self.clusters[cluster_idx+1, 0] - 1 + high = self.clusters[cluster_idx + 1, 0] - 1 else: high = len(self.data_order) - 1 - return self.clusters[cluster_idx, 0] == low \ - and self.clusters[cluster_idx, 1] == high + return self.clusters[cluster_idx, 0] == low and self.clusters[cluster_idx, 1] == high def combine_clusters(self, combine): """Combine clusters specified by each subarray of cluster indices. @@ -283,15 +332,35 @@ def combine_clusters(self, combine): # Test that all clusters are contiguous and adjacent first = c[0] for i in range(c[0], c[-1]): - if c[i+1-first]-1 != c[i-first]: - raise ValueError(''.join(["Clusters ", str(c[i]), " and ", str(c[i+1]), " are not contiguous and/or increasing."])) - if self.clusters[i+1, 0]-self.clusters[i, 1] != 1: - raise ValueError(''.join(["Clusters ", str(c[i]), " and ", str(c[i+1]), " have unclustered points between them."])) - - #update cluster endpoints + if c[i + 1 - first] - 1 != c[i - first]: + raise ValueError( + "".join( + [ + "Clusters ", + str(c[i]), + " and ", + str(c[i + 1]), + " are not contiguous and/or increasing.", + ] + ) + ) + if self.clusters[i + 1, 0] - self.clusters[i, 1] != 1: + raise ValueError( + "".join( + [ + "Clusters ", + str(c[i]), + " and ", + str(c[i + 1]), + " have unclustered points between them.", + ] + ) + ) + + # update cluster endpoints self.clusters[c[0], 1] = self.clusters[c[-1], 1] todelete = np.array([c[1:] for c in combine]).ravel() - self.clusters = np.delete(self.clusters, todelete ,0) + self.clusters = np.delete(self.clusters, todelete, 0) def find_adjacent_clusters(self): """Return all cluster indices with no unclustered points between them. @@ -306,20 +375,26 @@ def find_adjacent_clusters(self): adj = [] left_idx = 0 - while left_idx < len(self.clusters)-1: - while left_idx < len(self.clusters)-1 and self.clusters[left_idx+1, 0] - self.clusters[left_idx, 1] !=1: + while left_idx < len(self.clusters) - 1: + while ( + left_idx < len(self.clusters) - 1 + and self.clusters[left_idx + 1, 0] - self.clusters[left_idx, 1] != 1 + ): left_idx += 1 # Not left_idx+1 since left_idx=len(self.clusters)-2 even if no # clusters are actually adjacent. right_idx = left_idx - while right_idx < len(self.clusters)-1 and self.clusters[right_idx+1, 0] - self.clusters[right_idx, 1] == 1: + while ( + right_idx < len(self.clusters) - 1 + and self.clusters[right_idx + 1, 0] - self.clusters[right_idx, 1] == 1 + ): right_idx += 1 if right_idx > left_idx: - adj.append(range(left_idx, right_idx+1)) - left_idx = right_idx+1 # set for next possible left_idx + adj.append(range(left_idx, right_idx + 1)) + left_idx = right_idx + 1 # set for next possible left_idx return np.array(adj) def cut(self, idx): @@ -331,24 +406,23 @@ def cut(self, idx): data_ids = self.clusters[idx] if len(data_ids) == data_ids.size: # idx is a scalar, so give single slice object - return slice(data_ids[0], data_ids[1]+1) + return slice(data_ids[0], data_ids[1] + 1) else: # idx is a list/slice, so give list of slice objects - return [slice(c[0], c[1]+1) for c in data_ids] + return [slice(c[0], c[1] + 1) for c in data_ids] def cluster_boundaries(self): """Return sequence with (x,y) of all cluster boundaries.""" boundaries = [] - for l in self.clusters: - xlo = np.mean(self.x[l[0]-1:l[0]+1]) - ylo = np.mean(self.y[l[0]-1:l[0]+1]) - xhi = np.mean(self.x[l[1]:l[1]+2]) - yhi = np.mean(self.y[l[1]:l[1]+2]) + for cluster in self.clusters: + xlo = np.mean(self.x[cluster[0] - 1 : cluster[0] + 1]) + ylo = np.mean(self.y[cluster[0] - 1 : cluster[0] + 1]) + xhi = np.mean(self.x[cluster[1] : cluster[1] + 2]) + yhi = np.mean(self.y[cluster[1] : cluster[1] + 2]) boundaries.append((xlo, ylo)) boundaries.append((xhi, yhi)) return np.unique(boundaries) - def plot(self, *args, **kwds): """Plot the data with vertical lines at the cluster divisions. @@ -362,7 +436,7 @@ def plot(self, *args, **kwds): boundaries = self.cluster_boundaries() (ymin, ymax) = ax.get_ylim() for b in boundaries: - plt.axvline(b[0], 0, (b[1]-ymin)/(ymax-ymin), color='k') + plt.axvline(b[0], 0, (b[1] - ymin) / (ymax - ymin), color="k") plt.ion() ax.figure.canvas.draw() return @@ -376,20 +450,24 @@ def animate(self): status = self.status self.reset_clusters() + fig, ax = plt.subplots() + canvas = fig.canvas + background = canvas.copy_from_bbox(ax.bbox) + ymin, ymax = ax.get_ylim() all_lines = [] for i in self: canvas.restore_region(background) boundaries = self.cluster_boundaries() for i, b in enumerate(boundaries): - height = (b[1]-ymin)/(ymax-ymin) + height = (b[1] - ymin) / (ymax - ymin) if i < len(all_lines): all_lines[i].set_xdata([b[0], b[0]]) all_lines[i].set_ydata([0, height]) ax.draw_artist(all_lines[i]) else: - l = plt.axvline(b[0], 0, height, color='k', animated=True) - ax.draw_artist(l) - all_lines.append(l) + line = plt.axvline(b[0], 0, height, color="k", animated=True) + ax.draw_artist(line) + all_lines.append(line) canvas.blit(ax.bbox) self.clusters = clusters @@ -399,21 +477,40 @@ def animate(self): self.status = status return -#End of class DataClusters +# End of class DataClusters -# simple test code -if __name__ == '__main__': - x = np.array([-2., -1.5, -1., -0.5, 0., 0.5, 1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5.]) - y = np.array([0.0183156, 0.105399, 0.36788, 0.778806, 1.00012, 0.780731, 0.386195, 0.210798, 0.386195, 0.780731, 1.00012, 0.778806, 0.36788, 0.105399, 0.0183156]) - - testcluster = DataClusters(x, y, .1) +# simple test code +if __name__ == "__main__": + + x = np.array([-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]) + y = np.array( + [ + 0.0183156, + 0.105399, + 0.36788, + 0.778806, + 1.00012, + 0.780731, + 0.386195, + 0.210798, + 0.386195, + 0.780731, + 1.00012, + 0.778806, + 0.36788, + 0.105399, + 0.0183156, + ] + ) + + testcluster = DataClusters(x, y, 0.1) testcluster.makeclusters() - print testcluster.clusters + print(testcluster.clusters) adj = testcluster.find_adjacent_clusters() - print adj - if len(adj) >0: + print(adj) + if len(adj) > 0: testcluster.combine_clusters(adj) - print testcluster.clusters + print(testcluster.clusters) diff --git a/diffpy/srmise/modelcluster.py b/src/diffpy/srmise/modelcluster.py similarity index 66% rename from diffpy/srmise/modelcluster.py rename to src/diffpy/srmise/modelcluster.py index 10c0677..636e7e8 100644 --- a/diffpy/srmise/modelcluster.py +++ b/src/diffpy/srmise/modelcluster.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -22,14 +23,12 @@ import re import sys -import matplotlib.pyplot as plt import numpy as np -import scipy as sp -from scipy.optimize import leastsq -from diffpy.srmise.baselines import Baseline +from diffpy.srmise import srmiselog +from diffpy.srmise.baselines.base import Baseline from diffpy.srmise.modelparts import ModelParts -from diffpy.srmise.peaks import Peak, Peaks +from diffpy.srmise.peaks.base import Peak, Peaks from diffpy.srmise.srmiseerrors import ( SrMiseDataFormatError, SrMiseEstimationError, @@ -39,8 +38,6 @@ logger = logging.getLogger("diffpy.srmise") -from diffpy.srmise import srmiselog - class ModelCovariance(object): """Helper class preserves uncertainty info (full covariance matrix) for a fit model. @@ -57,8 +54,8 @@ class ModelCovariance(object): def __init__(self, *args, **kwds): """Intialize object.""" - self.cov = None # The raw covariance matrix - self.model = None # ModelParts instance, so both peaks and baseline (if present) + self.cov = None # The raw covariance matrix + self.model = None # ModelParts instance, so both peaks and baseline (if present) # Map i->[n1,n2,...] of the jth ModelPart to the n_i parameters in cov. self.mmap = {} @@ -85,10 +82,12 @@ def setcovariance(self, model, cov): Parameters ---------- - model - A ModelParts object - cov - The nxn covariance matrix for n model parameters. If the parameterization includes "fixed" - parameters not included in the covariance matrix, the matrix is expanded to include these - parameters with 0 uncertainty. + model : ModelParts + The ModelParts instance + cov : ndarray + The nxn covariance matrix for n model parameters. If the parameterization includes "fixed" + parameters not included in the covariance matrix, the matrix is expanded to include these + parameters with 0 uncertainty. """ tempcov = np.array(cov) @@ -96,11 +95,12 @@ def setcovariance(self, model, cov): emsg = "Parameter 'cov' must be a square matrix." raise ValueError(emsg) - if tempcov.shape[0] != model.npars(True) and tempcov.shape[0] != model.npars(False): - emsg = ["Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", - "parameters in the model, or the total number of parameters (fixed and free) of ", - "the model."] + emsg = [ + "Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", + "parameters in the model, or the total number of parameters (fixed and free) of ", + "the model.", + ] raise ValueError("".join(emsg)) self.model = model.copy() @@ -113,8 +113,8 @@ def setcovariance(self, model, cov): for i, m in enumerate(model): self.mmap[i] = n + np.arange(m.npars(True)) for j, p in enumerate(m): - self.pmap[(i,j)] = n - self.ipmap[n] = (i,j) + self.pmap[(i, j)] = n + self.ipmap[n] = (i, j) n += 1 if n == tempcov.shape[0]: @@ -122,21 +122,20 @@ def setcovariance(self, model, cov): self.cov = tempcov else: # Create new covariance matrix, making sure to account for fixed pars - self.cov = np.matrix(np.zeros((n,n))) + self.cov = np.matrix(np.zeros((n, n))) - i=0 - rawi=0 + i = 0 + rawi = 0 for i in range(n): j = 0 rawj = 0 if free[i]: for j in range(n): if free[j]: - self.cov[i,j] = cov[rawi,rawj] + self.cov[i, j] = cov[rawi, rawj] rawj += 1 rawi += 1 - def transform(self, in_format, out_format, **kwds): """Transform parameters and covariance matrix under specified change of variables. @@ -154,8 +153,10 @@ def transform(self, in_format, out_format, **kwds): Parameters ---------- - in_format - The current format of parameters - out_format - The new format for parameters + in_format : str + The current format of parameters + out_format : str + The new format for parameters Keywords -------- @@ -168,7 +169,7 @@ def transform(self, in_format, out_format, **kwds): if "parts" in kwds: if kwds["parts"] == "peaks": - parts = range(len(self.model)-1) + parts = range(len(self.model) - 1) elif kwds["parts"] == "baseline": parts = [-1] else: @@ -189,33 +190,41 @@ def transform(self, in_format, out_format, **kwds): for i in parts: start = self.mmap[i][0] - stop = self.mmap[i][-1]+1 + stop = self.mmap[i][-1] + 1 p = self.model[i] try: subg = p.owner().transform_derivatives(p.pars, in_format, out_format) except NotImplementedError: - logger.warning("Transformation gradient not implemented for part %i: %s. Ignoring transformation." %(i, str(p))) + logger.warning( + "Transformation gradient not implemented for part %i: %s. Ignoring transformation." + % (i, str(p)) + ) subg = np.identity(p.npars(True)) except Exception as e: - logger.warning("Transformation gradient failed for part %i: %s. Failed with message %s. Ignoring transformation." %(i, str(p), str(e))) + logger.warning( + "Transformation gradient failed for part %i: %s. " + "Failed with message %s. Ignoring transformation." % (i, str(p), str(e)) + ) subg = np.identity(p.npars(True)) # Now transform the parameters to match try: p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) except Exception as e: - logger.warning("Parameter transformation failed for part %i: %s. Failed with message %s. Ignoring transformation." %(i, str(p), str(e))) + logger.warning( + "Parameter transformation failed for part %i: %s. " + "Failed with message %s. Ignoring transformation." % (i, str(p), str(e)) + ) subg = np.identity(p.npars(True)) # Update the global gradient matrix g[start:stop, start:stop] = subg g = np.matrix(g) - self.cov = np.array(g*np.matrix(self.cov).transpose()*g) + self.cov = np.array(g * np.matrix(self.cov).transpose() * g) return - def getcorrelation(self, i, j): """Return the correlation between variables i and j, Corr_ij=Cov_ij/(sigma_i sigma_j) @@ -224,6 +233,18 @@ def getcorrelation(self, i, j): The standard deviation of fixed parameters is 0, in which case the correlation is undefined, but return 0 for simplicity. + + Parameters + ---------- + i : int + The index of variable in peak mapping + j : int + The index of variable in peak mapping + + Returns + ------- + float + The correlation between variables i and j """ if self.cov is None: emsg = "Cannot get correlation on undefined covariance matrix." @@ -233,10 +254,10 @@ def getcorrelation(self, i, j): i1 = self.pmap[i] if i in self.pmap else i j1 = self.pmap[j] if j in self.pmap else j - if self.cov[i1,i1] == 0. or self.cov[j1,j1] == 0.: - return 0. # Avoiding undefined quantities is sensible in this context. + if self.cov[i1, i1] == 0.0 or self.cov[j1, j1] == 0.0: + return 0.0 # Avoiding undefined quantities is sensible in this context. else: - return self.cov[i1,j1]/(np.sqrt(self.cov[i1,i1])*np.sqrt(self.cov[j1,j1])) + return self.cov[i1, j1] / (np.sqrt(self.cov[i1, i1]) * np.sqrt(self.cov[j1, j1])) def getvalue(self, i): """Return value of parameter i. @@ -252,15 +273,37 @@ def getuncertainty(self, i): The variable may be specified as an integer, or as a two-component tuple of integers (l, m) which indicate the mth parameter of modelpart l. + + Parameters + ---------- + i : int + The index of variable in peak mapping + + Returns + ------- + float + The uncertainty of variable at index i. """ (l, m) = i if i in self.pmap else self.ipmap[i] - return np.sqrt(self.getcovariance(i,i)) + return np.sqrt(self.getcovariance(i, i)) def getcovariance(self, i, j): """Return the covariance between variables i and j. The variables may be specified as integers, or as a two-component tuple of integers (l, m) which indicate the mth parameter of modelpart l. + + Parameters + ---------- + i : int + The index of variable in peak mapping + j : int + The index of variable in peak mapping + + Returns + ------- + float + The covariance between variables at indeex i and j. """ if self.cov is None: emsg = "Cannot get correlation on undefined covariance matrix." @@ -270,13 +313,23 @@ def getcovariance(self, i, j): i1 = self.pmap[i] if i in self.pmap else i j1 = self.pmap[j] if j in self.pmap else j - return self.cov[i1,j1] + return self.cov[i1, j1] def get(self, i): """Return (value, uncertainty) tuple for parameter i. The variable may be specified as an integer, or as a two-component tuple of integers (l, m) which indicate the mth parameter of modelpart l. + + Parameters + ---------- + i : int + The index of variable in peak mapping + + Returns + ------- + (float, float) + The value and uncertainty of variable at index i. """ return (self.getvalue(i), self.getuncertainty(i)) @@ -289,8 +342,13 @@ def correlationwarning(self, threshold=0.8): Parameters ---------- - threshold - A real number between 0 and 1. + threshold : float + A real number between 0 and 1. + Returns + ------- + tuple (i, j, c) + Indices of the modelpart and their correlations. """ if self.cov is None: emsg = "Cannot calculate correlation on undefined covariance matrix." @@ -298,9 +356,9 @@ def correlationwarning(self, threshold=0.8): correlated = [] for i in range(self.cov.shape[0]): - for j in range(i+1, self.cov.shape[0]): - c = self.getcorrelation(i,j) - if c and np.abs(c) > threshold: # filter out None values + for j in range(i + 1, self.cov.shape[0]): + c = self.getcorrelation(i, j) + if c and np.abs(c) > threshold: # filter out None values correlated.append((self.ipmap[i], self.ipmap[j], c)) return correlated @@ -310,7 +368,7 @@ def __str__(self): return "Model and/or Covariance matrix undefined." lines = [] for i, m in enumerate(self.model): - lines.append(" ".join([self.prettypar((i,j)) for j in range(len(m))])) + lines.append(" ".join([self.prettypar((i, j)) for j in range(len(m))])) return "\n".join(lines) def prettypar(self, i): @@ -318,15 +376,26 @@ def prettypar(self, i): The variable may be specified as an integer, or as a two-component tuple of integers (l, m) which indicate the mth parameter of modelpart l. + + Parameters + ---------- + i : int + The index of variable in peak mapping + + Returns + ------- + str + 'value (uncertainty)' for variable at index i. """ if self.model is None or self.cov is None: return "Model and/or Covariance matrix undefined." k = i if i in self.ipmap else self.pmap[i] - return "%.5e (%.5e)" %(self.getvalue(k), np.sqrt(self.getcovariance(k,k))) + return "%.5e (%.5e)" % (self.getvalue(k), np.sqrt(self.getcovariance(k, k))) # End of class ModelCovariance + class ModelCluster(object): """Associate a contiguous cluster of data with an appropriate model. @@ -363,18 +432,26 @@ class ModelCluster(object): def __init__(self, model, *args, **kwds): """Intialize explicitly, or from existing ModelCluster. - Parameters [Explicit creation] - model - Peaks object, or None->empty model - baseline - Baseline object, or None->0 - r_data - Numpy array of r coordinates - y_data - Numpy array of y values - y_error - Numpy array of uncertainties in y - cluster_slice - slice object defining the range of cluster. None->all data - error_method - an ErrorEvaluator subclass - peak_funcs - a sequence of PeakFunction instances - - Parameters [Creation from existing ModelCluster] - model - ModelCluster instance, or sequence of ModelCluster instances + Parameters + ---------- + model : (lists of) ModelCluster instance + The ModelCluster instances to be clustered. + If it is None, then a ModelCluster object is created. + baseline : Baseline object + The Baseline object, if it is None, set to 0. + r_data : array-like + The numpy array of r coordinates + y_data : array-like + The numpy array of y values + y_error : array-like + The numpy array of uncertainties in y + cluster_slice : slice object + The slice object defining the range of cluster. If the input is None, + then it will take the entire range. + error_method : ErrorEvaluator subclass + The error evaluator to use to calculate quality of model to data. + peak_funcs : a sequence of PeakFunction instances + The peak instances to use to calculate the cluster of data. """ self.last_fit_size = 0 self.slice = None @@ -399,7 +476,7 @@ def __init__(self, model, *args, **kwds): self.error_method = orig.error_method self.peak_funcs = list(orig.peak_funcs) return - else: # Explicit creation + else: # Explicit creation if model is None: self.model = Peaks([]) else: @@ -431,7 +508,13 @@ def addexternalpeaks(self, peaks): """Add peaks (and their value) to self. Parameters - peaks - A Peaks object + ---------- + peaks : A Peaks object + The peaks to be added + + Returns + ------- + None """ self.replacepeaks(peaks) self.y_data += peaks.value(self.r_data) @@ -441,8 +524,9 @@ def writestr(self, **kwds): """Return partial string representation. Keywords - pfbaselist - List of peak function bases. Otherwise define list from self. - blfbaselist - List of baseline function bases. Otherwise define list from self. + -------- + pfbaselist - List of peak function bases. Otherwise, define list from self. + blfbaselist - List of baseline function bases.Otherwise, define list from self. """ from diffpy.srmise.basefunction import BaseFunction @@ -468,26 +552,26 @@ def writestr(self, **kwds): if self.peak_funcs is None: lines.append("peak_funcs=None") else: - lines.append("peak_funcs=%s" %repr([pfbaselist.index(p) for p in self.peak_funcs])) + lines.append("peak_funcs=%s" % repr([pfbaselist.index(p) for p in self.peak_funcs])) if self.error_method is None: - lines.append('ModelEvaluator=None') + lines.append("ModelEvaluator=None") else: - lines.append('ModelEvaluator=%s' %self.error_method.__name__) + lines.append("ModelEvaluator=%s" % self.error_method.__name__) - lines.append("slice=%s" %repr(self.slice)) + lines.append("slice=%s" % repr(self.slice)) # Indexed baseline functions (unless externally provided) if writeblf: lines.append("## BaselineFunctions") for i, bf in enumerate(blfbaselist): - lines.append('# BaselineFunction %s' %i) + lines.append("# BaselineFunction %s" % i) lines.append(bf.writestr(blfbaselist)) # Indexed peak functions (unless externally provided) if writepf: lines.append("## PeakFunctions") for i, pf in enumerate(pfbaselist): - lines.append('# PeakFunction %s' %i) + lines.append("# PeakFunction %s" % i) lines.append(pf.writestr(pfbaselist)) lines.append("# BaselineObject") @@ -501,17 +585,16 @@ def writestr(self, **kwds): lines.append("None") else: for m in self.model: - lines.append('# ModelPeak') + lines.append("# ModelPeak") lines.append(m.writestr(pfbaselist)) # Raw data in modelcluster. - lines.append('### start data') - lines.append('#L r y dy') + lines.append("### start data") + lines.append("#L r y dy") for i in range(len(self.r_data)): - lines.append('%g %g %g' % \ - (self.r_data[i], self.y_data[i], self.y_error[i]) ) + lines.append("%g %g %g" % (self.r_data[i], self.y_data[i], self.y_error[i])) - datastring = "\n".join(lines)+"\n" + datastring = "\n".join(lines) + "\n" return datastring @staticmethod @@ -519,8 +602,9 @@ def factory(mcstr, **kwds): """Create ModelCluster from string. Keywords - pfbaselist - List of peak function bases - blfbaselist - List of baseline function bases + -------- + pfbaselist : List of peak function bases + blfbaselist : List of baseline function bases """ from diffpy.srmise.basefunction import BaseFunction @@ -545,102 +629,97 @@ def factory(mcstr, **kwds): # - StartData # find data section, and what information it contains - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', mcstr, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", mcstr, re.M) if res: - start_data = mcstr[res.end():].strip() - start_data_info = mcstr[res.start():res.end()] - header = mcstr[:res.start()] - res = re.search(r'^(#+L.*)$', start_data_info, re.M) + start_data = mcstr[res.end() :].strip() + start_data_info = mcstr[res.start() : res.end()] + header = mcstr[: res.start()] + res = re.search(r"^(#+L.*)$", start_data_info, re.M) if res: - start_data_info = start_data_info[res.start():res.end()].strip() + start_data_info = start_data_info[res.start() : res.end()].strip() hasr = False hasy = False hasdy = False - res = re.search(r'\br\b', start_data_info) + res = re.search(r"\br\b", start_data_info) if res: hasr = True - res = re.search(r'\by\b', start_data_info) + res = re.search(r"\by\b", start_data_info) if res: hasy = True - res = re.search(r'\bdy\b', start_data_info) + res = re.search(r"\bdy\b", start_data_info) if res: hasdy = True # Model - res = re.search(r'^#+ ModelPeaks.*$', header, re.M) + res = re.search(r"^#+ ModelPeaks.*$", header, re.M) if res: - model_peaks = header[res.end():].strip() - header = header[:res.start()] + model_peaks = header[res.end() :].strip() + header = header[: res.start()] # Baseline Object - res = re.search(r'^#+ BaselineObject\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ BaselineObject\s*(?:#.*\s+)*", header, re.M) if res: - baselineobject = header[res.end():].strip() - header = header[:res.start()] + baselineobject = header[res.end() :].strip() + header = header[: res.start()] # Peak functions if readpf: - res = re.search(r'^#+ PeakFunctions.*$', header, re.M) + res = re.search(r"^#+ PeakFunctions.*$", header, re.M) if res: - peakfunctions = header[res.end():].strip() - header = header[:res.start()] + peakfunctions = header[res.end() :].strip() + header = header[: res.start()] # Baseline functions if readblf: - res = re.search(r'^#+ BaselineFunctions.*$', header, re.M) + res = re.search(r"^#+ BaselineFunctions.*$", header, re.M) if res: - baselinefunctions = header[res.end():].strip() - header = header[:res.start()] + baselinefunctions = header[res.end() :].strip() + header = header[: res.start()] - ### Instantiating baseline functions + # Instantiating baseline functions if readblf: blfbaselist = [] - res = re.split(r'(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*', baselinefunctions) + res = re.split(r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions) for s in res[1:]: blfbaselist.append(BaseFunction.factory(s, blfbaselist)) - ### Instantiating peak functions + # Instantiating peak functions if readpf: pfbaselist = [] - res = re.split(r'(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*', peakfunctions) + res = re.split(r"(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*", peakfunctions) for s in res[1:]: pfbaselist.append(BaseFunction.factory(s, pfbaselist)) - - ### Instantiating header data + # Instantiating header data # peak_funcs - res = re.search(r'^peak_funcs=(.*)$', header, re.M) + res = re.search(r"^peak_funcs=(.*)$", header, re.M) peak_funcs = eval(res.groups()[0].strip()) if peak_funcs is not None: peak_funcs = [pfbaselist[i] for i in peak_funcs] # error_method - res = re.search(r'^ModelEvaluator=(.*)$', header, re.M) + res = re.search(r"^ModelEvaluator=(.*)$", header, re.M) __import__("diffpy.srmise.modelevaluators") module = sys.modules["diffpy.srmise.modelevaluators"] error_method = getattr(module, res.groups()[0].strip()) # slice - res = re.search(r'^slice=(.*)$', header, re.M) + res = re.search(r"^slice=(.*)$", header, re.M) cluster_slice = eval(res.groups()[0].strip()) - - ### Instantiating BaselineObject - if re.match(r'^None$', baselineobject): + # Instantiating BaselineObject + if re.match(r"^None$", baselineobject): baseline = None else: baseline = Baseline.factory(baselineobject, blfbaselist) - - ### Instantiating model + # Instantiating model model = Peaks() - res = re.split(r'(?m)^#+ ModelPeak\s*(?:#.*\s+)*', model_peaks) + res = re.split(r"(?m)^#+ ModelPeak\s*(?:#.*\s+)*", model_peaks) for s in res[1:]: model.append(Peak.factory(s, pfbaselist)) - - - ### Instantiating start data + # Instantiating start data # read actual data - r, y, dy arrays = [] if hasr: @@ -661,12 +740,13 @@ def factory(mcstr, **kwds): # raise SrMiseDataFormatError if something goes wrong try: for line in start_data.split("\n"): - l = line.split() - if len(arrays) != len(l): - emsg = ("Number of value fields does not match that given by '%s'" %start_data_info) + lines = line.split() + if len(arrays) != len(lines): + emsg = "Number of value fields does not match that given by '%s'" % start_data_info + raise IndexError(emsg) for a, v in zip(arrays, line.split()): a.append(float(v)) - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise SrMiseDataFormatError(err) if hasr: r_data = np.array(r_data) @@ -675,8 +755,16 @@ def factory(mcstr, **kwds): if hasdy: y_error = np.array(y_error) - return ModelCluster(model, baseline, r_data, y_data, y_error, cluster_slice, error_method, peak_funcs) - + return ModelCluster( + model, + baseline, + r_data, + y_data, + y_error, + cluster_slice, + error_method, + peak_funcs, + ) @staticmethod def join_adjacent(m1, m2): @@ -687,8 +775,16 @@ def join_adjacent(m1, m2): unchanged. Parameters - m1 - A ModelCluster - m2 - A ModelCluster + ---------- + m1 : ModelCluster instance + The first ModelCluster instance. + m2 : ModelCluster instance + The second ModelCluster instance. + + Returns + ------- + ModelCluster instance + The new ModelCluster instance between m1 and m2. """ # Check for members that must be shared. if not (m1.r_data is m2.r_data): @@ -724,11 +820,11 @@ def join_adjacent(m1, m2): if not right_ids[0] == left_ids[1]: raise ValueError("Given ModelClusters are not adjacent.") - new_slice=slice(left_ids[0], right_ids[1], 1) + new_slice = slice(left_ids[0], right_ids[1], 1) # Approximately where the clusters meet. - border_x = .5*(left.r_data[left_ids[1]-1] + right.r_data[right_ids[0]]) - border_y = .5*(left.y_data[left_ids[1]-1] + right.y_data[right_ids[0]]) + border_x = 0.5 * (left.r_data[left_ids[1] - 1] + right.r_data[right_ids[0]]) + border_y = 0.5 * (left.y_data[left_ids[1] - 1] + right.y_data[right_ids[0]]) if len(m1.model) > 0 and len(m2.model) > 0: new_model = left.model.copy() @@ -741,27 +837,45 @@ def join_adjacent(m1, m2): # border_x are removed. The highly unlikely case of two peaks # exactly at the border is also handled. for i in reversed(range(len(new_model))): - if new_model[i]["position"] == border_x and \ - i > 0 and new_model[i-1]["position"] == border_x: + if new_model[i]["position"] == border_x and i > 0 and new_model[i - 1]["position"] == border_x: del new_model[i] elif new_ids[i] != i: - if (new_model[i]["position"] > border_x and new_ids[i] < len(left.model)) and \ - (new_model[i]["position"] < border_x and new_ids[i] >= len(left.model)): + if (new_model[i]["position"] > border_x and new_ids[i] < len(left.model)) and ( + new_model[i]["position"] < border_x and new_ids[i] >= len(left.model) + ): del new_model[i] # Likely to improve any future fitting new_model.match_at(border_x, border_y) elif len(m1.model) > 0: new_model = m1.model.copy() - else: # Only m2 has entries, or both are empty + else: # Only m2 has entries, or both are empty new_model = m2.model.copy() - peak_funcs = list(set(m1.peak_funcs) | set(m2.peak_funcs)) # "Union" - return ModelCluster(new_model, m1.baseline, m1.r_data, m1.y_data, - m1.y_error, new_slice, m1.error_method, peak_funcs) + peak_funcs = list(set(m1.peak_funcs) | set(m2.peak_funcs)) # "Union" + return ModelCluster( + new_model, + m1.baseline, + m1.r_data, + m1.y_data, + m1.y_error, + new_slice, + m1.error_method, + peak_funcs, + ) def change_slice(self, new_slice): - """Change the slice which represents the extent of a cluster.""" + """Change the slice which represents the extent of a cluster. + + Parameters + ---------- + new_slice : slice object + The new slice to change. + + Returns + ------- + None + """ old_slice = self.slice self.slice = new_slice self.r_cluster = self.r_data[new_slice] @@ -798,22 +912,35 @@ def npars(self, count_baseline=True, count_fixed=True): """Return number of parameters in model and baseline. Parameters - count_baseline - [True] Boolean determines whether or not to count - parameters from baseline. - count_fixed - [True] Boolean determines whether or not to include - non-free parameters. + ---------- + count_baseline : bool + The boolean determines whether to count parameters from baseline. Default is True. + count_fixed : bool + The boolean determines whether to include non-free parameters. Default is True. + + Returns + ------- + n : int + The number of parameters in model and baseline. """ n = self.model.npars(count_fixed=count_fixed) if count_baseline and self.baseline is not None: n += self.baseline.npars(count_fixed=count_fixed) return n - def replacepeaks(self, newpeaks, delslice=slice(0,0)): + def replacepeaks(self, newpeaks, delslice=slice(0, 0)): """Replace peaks given by delslice by those in newpeaks. Parameters - newpeaks - Add each Peak in this Peaks to cluster. - delslice - Existing peaks given by slice object are deleted. + ---------- + newpeaks : Peak instance + The peak that id added to each existing peak to cluster. + delslice : Peak instance + The existing peaks given by slice object are deleted. + + Returns + ------- + None """ for p in self.model[delslice]: if not p.removable: @@ -823,12 +950,28 @@ def replacepeaks(self, newpeaks, delslice=slice(0,0)): return def deletepeak(self, idx): - """Delete the peak at the given index.""" - self.replacepeaks([], slice(idx,idx+1)) + """Delete the peak at the given index. + + Parameters + ---------- + idx : int + Index of peak to delete. + + Returns + ------- + None + """ + self.replacepeaks([], slice(idx, idx + 1)) def estimatepeak(self): - """Attempt to add single peak to empty cluster. Return True if successful.""" - ### STUB!!! ### + """Attempt to add single peak to empty cluster. Return True if successful. + + Returns + ------- + bool + True if successful, False otherwise. + """ + # STUB!!! ### # Currently only a single peak function is supported. Dynamic # selection from multiple types may require additional support # within peak functions themselves. The simplest method would @@ -842,40 +985,57 @@ def estimatepeak(self): selected = self.peak_funcs[0] estimate = selected.estimate_parameters(self.r_cluster, self.y_cluster - self.valuebl()) - if estimate is not None: newpeak = selected.actualize(estimate, "internal") - logger.info("Estimate: %s" %newpeak) + logger.info("Estimate: %s" % newpeak) self.replacepeaks(Peaks([newpeak])) return True else: return False - def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=None, cov_format="default_output"): + def fit( + self, + justify=False, + ntrials=0, + fitbaseline=False, + estimate=True, + cov=None, + cov_format="default_output", + ): """Perform a chi-square fit of the model to data in cluster. Parameters - justify - Revert to initial model (if one exists) if new model - has only a single peak and the quality of the fit suggests - additional peaks are present. - ntrials - The maximum number of function evaluations. - '0' indicates the fitting algorithm's default. - fitbaseline - Whether to fit baseline along with peaks - estimate - Estimate a single peak from data if model is empty. - cov - Optional ModelCovariance object preserves covariance information. - cov_format - Parameterization to use in cov. - + ---------- + justify : bool + Revert to initial model (if one exists) if new model + has only a single peak and the quality of the fit suggests + additional peaks are present. Default is False. + ntrials : int + The maximum number of function evaluations. + '0' indicates the fitting algorithm's default. + fitbaseline : bool + Whether to fit baseline along with peaks. Default is False. + estimate : bool + Estimate a single peak from data if model is empty. Default is True. + cov : ModelCovariance or None + Optional ModelCovariance object preserves covariance information. + cov_format : str + Parameterization to use in cov. + + Returns + ------- + ModelEvaluator or None If fitting changes a model, return ModelEvaluator instance. Otherwise return None. """ if self.never_fit: return None if len(self.model) == 0: - #Attempt to add a first peak to the cluster + # Attempt to add a first peak to the cluster if estimate: try: self.estimatepeak() - except SrMiseEstimationError, e: + except SrMiseEstimationError: logger.info("Fit: No model to fit, estimation not possible.") return else: @@ -899,14 +1059,16 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No fmodel = self.model try: - fmodel.fit(self.r_data, - y_datafit, - self.y_error, - self.slice, - ntrials, - cov, - cov_format) - except SrMiseFitError, e: + fmodel.fit( + self.r_data, + y_datafit, + self.y_error, + self.slice, + ntrials, + cov, + cov_format, + ) + except SrMiseFitError as e: logger.debug("Error while fitting cluster: %s\nReverting to original model.", e) self.model = orig_model self.baseline = orig_baseline @@ -925,17 +1087,15 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No # Test for fit improvement if new_qual < orig_qual: # either fit blew up (and leastsq didn't notice) or the fit had already converged. - msg = ["ModelCluster.fit() warning: fit seems not to have improved.", - "Reverting to original model.", - "----------", - "New Quality: %s", - "Original Quality: %s" - "%s", - "----------"] - logger.debug("\n".join(msg), - new_qual.stat, - orig_qual.stat, - self.model) + msg = [ + "ModelCluster.fit() warning: fit seems not to have improved.", + "Reverting to original model.", + "----------", + "New Quality: %s", + "Original Quality: %s" "%s", + "----------", + ] + logger.debug("\n".join(msg), new_qual.stat, orig_qual.stat, self.model) self.model = orig_model self.baseline = orig_baseline @@ -953,9 +1113,11 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No # original fit is less likely to obscure any hidden peaks. if justify and len(self.model) == 1 and len(orig_model) > 0: min_npars = min([p.npars for p in self.peak_funcs]) - if new_qual.growth_justified(self, min_npars): - msg = ["ModelCluster.fit(): Fit over current cluster better explained by additional peaks.", - "Reverting to original model."] + if new_qual.growth_justified(self, min_npars): + msg = [ + "ModelCluster.fit(): Fit over current cluster better explained by additional peaks.", + "Reverting to original model.", + ] logger.debug("\n".join(msg)) self.model = orig_model @@ -967,35 +1129,44 @@ def contingent_fit(self, minpoints, growth_threshold): """Fit cluster if it has grown sufficiently large since its last fit. Parameters - minpoints - The minimum number of points an empty cluster requires to fit. - growth_threshold - Fit non-empty model if (currentsize/oldsize) >= this value. - - Return ModelEvaluator instance if fit changed, otherwise None. + ---------- + minpoints : int + The minimum number of points an empty cluster requires to fit. + growth_threshold : float + Fit non-empty model if (currentsize/oldsize) >= this value. + + Returns + ------- + ModelEvaluator or None + Return ModelEvaluator instance if fit changed, otherwise None. """ if self.never_fit: return None - if (self.last_fit_size > 0 and float(self.size)/self.last_fit_size >= growth_threshold) \ - or (self.last_fit_size == 0 and self.size >= minpoints): + if (self.last_fit_size > 0 and float(self.size) / self.last_fit_size >= growth_threshold) or ( + self.last_fit_size == 0 and self.size >= minpoints + ): return self.fit(justify=True) return None def cleanfit(self): """Remove poor-quality peaks in the fit. Return number removed.""" - #Find peaks located outside the cluster + # Find peaks located outside the cluster pos = np.array([p["position"] for p in self.model]) left_idx = pos.searchsorted(self.r_cluster[0]) right_idx = pos.searchsorted(self.r_cluster[-1]) outside_idx = range(0, left_idx) outside_idx.extend(range(right_idx, len(self.model))) - #inside_idx = range(left_idx, right_idx) + # inside_idx = range(left_idx, right_idx) # Identify outside peaks that contribute < error everywhere in cluster. # Must check entire cluster and not just nearest endpoint because not # every peak function can be assumed to have its greatest contribution # there, and errors are not necessarily constant. - outside_idx = [i for i in outside_idx \ - if (self.model[i].removable \ - and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0)] + outside_idx = [ + i + for i in outside_idx + if (self.model[i].removable and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0) + ] # TODO: Check for peaks that have blown up. # Remember to check if a peak is removable. @@ -1015,11 +1186,11 @@ def cleanfit(self): msg.extend([str(self.model[i]) for i in nan_idx]) logger.debug("\n".join(msg)) -# # TODO: Uncomment when there's a point! -# if len(blown_idx) > 0: -# msg = ["Following peaks inside cluster were too large and had to be removed:"] -# msg.extend([str(self.model[i]) for i in blown_idx]) -# logger.info("\n".join(msg)) + # # TODO: Uncomment when there's a point! + # if len(blown_idx) > 0: + # msg = ["Following peaks inside cluster were too large and had to be removed:"] + # msg.extend([str(self.model[i]) for i in blown_idx]) + # logger.info("\n".join(msg)) # A peak can only be removed once. to_remove = list(set(outside_idx) | set(blown_idx) | set(nan_idx)) @@ -1039,16 +1210,22 @@ def reduce_to(self, x, y): a maximum very close to x may prevent optimal results. Parameters - x - Position at which to match - y - Height to match. - - Return ModelEvaluator instance if fit changed, otherwise None.""" + ---------- + x : array-like + The position at which to match + y : array-like + The height to match. + + Returns + ------- + ModelEvaluator or None + Return ModelEvaluator instance if fit changed, otherwise None.""" # No reduction neccessary if self.model.value(x) < y: logger.debug("reduce_to: No reduction necessary.") return None orig_model = self.model.copy() - self.model.match_at(x, y-self.valuebl(x)) + self.model.match_at(x, y - self.valuebl(x)) quality = self.fit() # Did reduction help? @@ -1066,14 +1243,26 @@ def reduce_to(self, x, y): return quality def value(self, r=None): - """Return value of baseline+model over cluster.""" - if len(self.model)==0: + """Return value of baseline+model over cluster. + + Parameters + ---------- + r : array-like, optional + value(s) over which to calculate the baseline's value. + The default is over the entire cluster. + + Returns + ------- + float + The value of baseline+model over cluster. + """ + if len(self.model) == 0: return self.valuebl(r) else: if r is None: - return self.valuebl(r)+(self.model.value(self.r_data, self.slice)[self.slice]) + return self.valuebl(r) + (self.model.value(self.r_data, self.slice)[self.slice]) else: - return self.valuebl(r)+(self.model.value(r)) + return self.valuebl(r) + (self.model.value(r)) def valuebl(self, r=None): """Return baseline's value over cluster. @@ -1081,17 +1270,23 @@ def valuebl(self, r=None): If no baseline exists its value is 0 everywhere. Parameters + ---------- r - value(s) over which to calculate the baseline's value. The default is over the entire cluster. + + Returns + ------- + float + The value of baseline's value. """ if self.baseline is None: if r is None: return np.zeros(self.size) else: - return r*0. + return r * 0.0 else: if r is None: - return (self.baseline.value(self.r_data, self.slice)[self.slice]) + return self.baseline.value(self.r_data, self.slice)[self.slice] else: return self.baseline.value(r) @@ -1107,10 +1302,18 @@ def quality(self, evaluator=None, **kwds): details see ModelEvaluator documentation. Parameters - evaluator - A ModelEvaluator class (not instance) to use instead of default. + ---------- + evaluator : ModelEvaluator class or None + The ModelEvaluator class to use. Default is None. Keywords + -------- kwds - Keyword arguments passed the the ModelEvaluator's evaluate() method. + + Returns + ------- + ModelEvaluator instance + The ModelEvaluator instance with quality calculated """ if evaluator is None: evaluator_inst = self.error_method() @@ -1123,42 +1326,60 @@ def plottable(self, joined=False): """Return sequence suitable for plotting cluster model+baseline with matplotlib. Parameters - joined - Return sum of all peaks, or each one individually. + ---------- + joined : bool + Return sum of all peaks if joined is True, or each one individually if False. + + Returns + ------- + array-like + A sequence of plottable objects. """ if joined: return [self.r_cluster, self.y_cluster, self.r_cluster, self.value()] else: toreturn = [self.r_cluster, self.y_cluster] bl = self.valuebl() - toreturn.extend([self.r_cluster for i in range(2*len(self.model))]) + toreturn.extend([self.r_cluster for i in range(2 * len(self.model))]) for i, p in enumerate(self.model): - toreturn[2*i+3] = bl + p.value(self.r_data, self.slice)[self.slice] + toreturn[2 * i + 3] = bl + p.value(self.r_data, self.slice)[self.slice] return toreturn def plottable_residual(self): - """Return sequence suitable for plotting cluster residual with matplotlib.""" + """Return sequence suitable for plotting cluster residual with matplotlib. + + Returns + ------- + array-like + A sequence of plottable clusters and residuals. + """ return [self.r_cluster, self.residual()] def augment(self, source): """Add peaks from another ModelCluster that improve this one's quality. Parameters - source - A ModelCluster instance + ---------- + source : ModelCluster instance + The ModelCluster instance to augment the model's quality. + + Returns + ------- + None """ best_model = self.model.copy() best_qual = self.quality() source_model = source.model.copy() - msg = ["==== Augmenting model ====", - "Original fit:", - "%s", - "w/ quality: %s", - "New model fits:", - "%s"] - logger.debug("\n".join(msg), - best_model, - best_qual.stat, - source_model) + msg = [ + "==== Augmenting model ====", + "Original fit:", + "%s", + "w/ quality: %s", + "New model fits:", + "%s", + ] + logger.debug("\n".join(msg), best_model, best_qual.stat, source_model) # Each iteration improves best_model by adding the peak from # source_model to best_model that most improves its quality, breaking @@ -1181,28 +1402,27 @@ def augment(self, source): best_model = test_models[args[-1]] del source_model[args[-1]] else: - break # Best possible model has been found. + break # Best possible model has been found. self.replacepeaks(best_model, slice(len(self.model))) # TODO: Do I need this? If test_model contains peaks # by reference, the fit peaks will change as well. self.fit() - msg = ["Best model after fit is:", - "%s", - "w/ quality: %s", - "================="] - logger.debug("\n".join(msg), - self.model, - best_qual.stat) + msg = ["Best model after fit is:", "%s", "w/ quality: %s", "================="] + logger.debug("\n".join(msg), self.model, best_qual.stat) return def __str__(self): """Return string representation of the cluster.""" - return '\n'.join(["Slice: %s" %self.slice, - "Quality: %s" %self.quality().stat, - "Baseline: %s" %self.baseline, - "Peaks:\n%s" %self.model]) + return "\n".join( + [ + "Slice: %s" % self.slice, + "Quality: %s" % self.quality().stat, + "Baseline: %s" % self.baseline, + "Peaks:\n%s" % self.model, + ] + ) def prune(self): """Remove peaks until model quality no longer improves. @@ -1223,10 +1443,19 @@ def prune(self): tracer.pushc() y_nobl = self.y_cluster - self.valuebl() - prune_mc = ModelCluster(None, None, self.r_cluster, y_nobl, self.error_cluster, None, self.error_method, self.peak_funcs) + prune_mc = ModelCluster( + None, + None, + self.r_cluster, + y_nobl, + self.error_cluster, + None, + self.error_method, + self.peak_funcs, + ) orig_model = self.model.copy() - peak_range = 3 # number of peaks on either side of deleted peak to fit + peak_range = 3 # number of peaks on either side of deleted peak to fit check_models = [] for m in orig_model: if m.removable: @@ -1237,16 +1466,11 @@ def prune(self): best_model = self.model.copy() best_qual = self.quality() - msg = ["====Pruning fits:====", - "Original model:", - "%s", - "w/ quality: %s"] - logger.info("\n".join(msg), - best_model, - best_qual.stat) + msg = ["====Pruning fits:====", "Original model:", "%s", "w/ quality: %s"] + logger.info("\n".join(msg), best_model, best_qual.stat) - #### Main prune loop #### - while(check_models.count(None) < len(check_models)): + # Main prune loop #### + while check_models.count(None) < len(check_models): # Cache value of individual peaks for best current model. best_modely = [] @@ -1271,23 +1495,17 @@ def prune(self): for i in range(len(check_models)): if check_models[i] is not None: # Create model with ith peak removed, and distant peaks effectively fixed - lo = max(i-peak_range, 0) - hi = min(i+peak_range+1, len(best_model)) + lo = max(i - peak_range, 0) + hi = min(i + peak_range + 1, len(best_model)) check_models[i] = best_model[lo:i].copy() - check_models[i].extend(best_model[i+1:hi].copy()) + check_models[i].extend(best_model[i + 1 : hi].copy()) prune_mc.model = check_models[i] - msg = ["len(check_models): %s", - "len(best_model): %s", - "i: %s"] - logger.debug("\n".join(msg), - len(check_models), - len(best_model), - i) + msg = ["len(check_models): %s", "len(best_model): %s", "i: %s"] + logger.debug("\n".join(msg), len(check_models), len(best_model), i) addpars = best_model.npars() - check_models[i].npars() - best_model[i].npars(count_fixed=False) - # Remove contribution of (effectively) fixed peaks y = np.array(y_nobl) if lo > 0: @@ -1298,30 +1516,23 @@ def prune(self): prune_mc.y_data = y prune_mc.y_cluster = y - msg = ["", - "--- %s ---", - "Removed peak: %s", - "Starting model:", - "%s"] - logger.debug("\n".join(msg), - i, - best_model[i], - prune_mc.model) + msg = [ + "", + "--- %s ---", + "Removed peak: %s", + "Starting model:", + "%s", + ] + logger.debug("\n".join(msg), i, best_model[i], prune_mc.model) - prune_mc.fit(ntrials=int(np.sqrt(len(y))+50), estimate=False) + prune_mc.fit(ntrials=int(np.sqrt(len(y)) + 50), estimate=False) qual = prune_mc.quality(kshift=addpars) check_qual = np.append(check_qual, qual) check_qualidx = np.append(check_qualidx, i) - msg = ["Found model:", - "%s", - "addpars: %s", - "qual: %s"] - logger.debug("\n".join(msg), - prune_mc.model, - addpars, - qual.stat) + msg = ["Found model:", "%s", "addpars: %s", "qual: %s"] + logger.debug("\n".join(msg), prune_mc.model, addpars, qual.stat) # Do not check this peak in the future if quality decreased. if qual < best_qual: @@ -1329,21 +1540,20 @@ def prune(self): arg = check_qual.argsort() - msg = [" - Finished round of pruning -", - "best_qual: %s", - "check_qual: %s", - "sorted check_qual: %s"] - logger.debug("\n".join(msg), - best_qual.stat, - [c.stat for c in check_qual], - arg) + msg = [ + " - Finished round of pruning -", + "best_qual: %s", + "check_qual: %s", + "sorted check_qual: %s", + ] + logger.debug("\n".join(msg), best_qual.stat, [c.stat for c in check_qual], arg) arg = arg[-1] newbest_qual = check_qual[arg] newbest_qualidx = check_qualidx[arg] if newbest_qual > best_qual: - lo = max(newbest_qualidx-peak_range, 0) - hi = min(newbest_qualidx+peak_range+1, len(orig_model)) + lo = max(newbest_qualidx - peak_range, 0) + hi = min(newbest_qualidx + peak_range + 1, len(orig_model)) bmtemp = best_model[:lo] bmtemp.extend(check_models[newbest_qualidx]) bmtemp.extend(best_model[hi:]) @@ -1360,12 +1570,8 @@ def prune(self): self.model = best_model tracer.emit(self) - msg = ["New best model:", - "%s", - "best_qual: %s"] - logger.debug("\n".join(msg), - best_model, - best_qual.stat) + msg = ["New best model:", "%s", "best_qual: %s"] + logger.debug("\n".join(msg), best_model, best_qual.stat) if len(best_model) > 0: del check_models[newbest_qualidx] @@ -1378,48 +1584,49 @@ def prune(self): else: break - msg = ["Best model after pruning is:", - "%s", - "w/ quality: %s", - "================="] - logger.info("\n".join(msg), - self.model, - self.quality().stat) + msg = [ + "Best model after pruning is:", + "%s", + "w/ quality: %s", + "=================", + ] + logger.info("\n".join(msg), self.model, self.quality().stat) tracer.popc() return + # simple test code -if __name__ == '__main__': +if __name__ == "__main__": from numpy.random import randn - from diffpy.srmise.modelevaluators import AICc - from diffpy.srmise.peaks import GaussianOverR + from diffpy.srmise.modelevaluators.aicc import AICc + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR - pf = GaussianOverR(.7) - res = .01 + pf = GaussianOverR(0.7) + res = 0.01 - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - r = np.arange(2,4,res) + r = np.arange(2, 4, res) y = ideal_peaks.value(r) + randn(len(r)) err = np.ones(len(r)) evaluator = AICc() - guesspars = [[2.9, .15, 5], [3.6, .3, 5]] + guesspars = [[2.9, 0.15, 5], [3.6, 0.3, 5]] guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, None, r, y, err, None, AICc, [pf]) - print "--- Actual Peak parameters ---" - print ideal_peaks + print("--- Actual Peak parameters ---") + print(ideal_peaks) - print "\n--- Before fit ---" - print cluster + print("\n--- Before fit ---") + print(cluster) cluster.fit() - print "\n--- After fit ---" - print cluster + print("\n--- After fit ---") + print(cluster) diff --git a/diffpy/srmise/peaks/__init__.py b/src/diffpy/srmise/modelevaluators/__init__.py similarity index 63% rename from diffpy/srmise/peaks/__init__.py rename to src/diffpy/srmise/modelevaluators/__init__.py index d3c7ee3..5540acb 100644 --- a/diffpy/srmise/peaks/__init__.py +++ b/src/diffpy/srmise/modelevaluators/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -10,10 +11,3 @@ # See LICENSE.txt for license information. # ############################################################################## - -__all__ = ["base", "gaussian", "gaussianoverr", "terminationripples"] - -from base import Peak, Peaks -from gaussian import Gaussian -from gaussianoverr import GaussianOverR -from terminationripples import TerminationRipples diff --git a/diffpy/srmise/modelevaluators/aic.py b/src/diffpy/srmise/modelevaluators/aic.py similarity index 51% rename from diffpy/srmise/modelevaluators/aic.py rename to src/diffpy/srmise/modelevaluators/aic.py index 915cace..1a1973b 100644 --- a/diffpy/srmise/modelevaluators/aic.py +++ b/src/diffpy/srmise/modelevaluators/aic.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -15,13 +16,13 @@ import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.modelevaluators.base import ModelEvaluator from diffpy.srmise.srmiseerrors import SrMiseModelEvaluatorError logger = logging.getLogger("diffpy.srmise") -class AIC (ModelEvaluator): + +class AIC(ModelEvaluator): """Evaluate and compare models with the AIC statistic. Akaike's Information Criterion (AIC) is a method for comparing statistical @@ -52,11 +53,20 @@ def __init__(self): def evaluate(self, fit, count_fixed=False, kshift=0): """Return quality of fit for given ModelCluster using AIC (Akaike's Information Criterion). - Parameters - fit: A ModelCluster - count_fixed: Whether fixed parameters are considered. - kshift: (0) Treat the model has having this many additional - parameters. Negative values also allowed.""" + Parameters + ---------- + fit : ModelCluster instance + The ModelCluster instance to evaluate. + count_fixed : bool + Whether fixed parameters are considered. Default is False. + kshift : int + Treat the model has having this many additional + parameters. Negative values also allowed. Default is 0. + + Returns + ------- + quality : float + The quality of fit for given ModelCluster.""" # Number of parameters. By default, fixed parameters are ignored. k = fit.model.npars(count_fixed=count_fixed) + kshift if k < 0: @@ -67,82 +77,143 @@ def evaluate(self, fit, count_fixed=False, kshift=0): n = fit.size if n < self.minpoints(k): - logger.warn("AIC.evaluate(): too few data to evaluate quality reliably.") + logger.warning("AIC.evaluate(): too few data to evaluate quality reliably.") n = self.minpoints(k) - if self.chisq == None: + if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) self.stat = self.chisq + self.parpenalty(k, n) return self.stat - def minpoints(self, npars): - """Calculates the minimum number of points required to make an estimate of a model's quality.""" + """Calculates the minimum number of points required to make an estimate of a model's quality. + + Parameters + ---------- + npars : int + The number of parameters in the model. + + Returns + ------- + int + The minimum number of points required to make an estimate of a model's quality. + """ return 1 - def parpenalty(self, k, n): - """Returns the cost for adding k parameters to the current model cluster.""" + def parpenalty(self, k): + """Returns the cost for adding k parameters to the current model cluster. + + Parameters + ---------- + k : int + The number of added parameters in the model. - #Weight the penalty for additional parameters. - #If this isn't 1 there had better be a good reason. - fudgefactor = 1. + Returns + ------- + float + The penalty cost for adding k parameters to the current model cluster. + """ - return (2*k)*fudgefactor + # Weight the penalty for additional parameters. + # If this isn't 1 there had better be a good reason. + fudgefactor = 1.0 + + return (2 * k) * fudgefactor def growth_justified(self, fit, k_prime): - """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified given the current quality of the fit. - The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified - if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which - depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future.""" + """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified + given the current quality of the fit. + + The assumption is that adding k_prime parameters will + result in "effectively 0" chiSquared cost, and so adding it is justified if the cost of adding + these parameters is less than the current chiSquared cost. + The validity of this assumption (which depends on an unknown chiSquared value) + and the impact of the errors used should be examined more thoroughly in the future. + + Parameters + ---------- + fit : ModelCluster instance + The ModelCluster instance to evaluate. + + k_prime : int + The prime number of added parameters in the model. + + Returns + ------- + bool + Whether adding k_prime parameters to the given model is justified. + """ if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) - k_actual = fit.model.npars(count_fixed=False) #parameters in current fit - k_test = k_actual + k_prime #parameters in prospective fit - n = fit.size #the number of data points included in the fit + k_actual = fit.model.npars(count_fixed=False) # parameters in current fit + k_test = k_actual + k_prime # parameters in prospective fit + n = fit.size # the number of data points included in the fit # If there are too few points to calculate AIC with the requested number of parameter # then clearly that increase in parameters is not justified. if n < self.minpoints(k_test): return False - #assert n >= self.minPoints(kActual) #check that AIC is defined for the actual fit + # assert n >= self.minPoints(kActual) #check that AIC is defined for the actual fit if n < self.minpoints(k_actual): - logger.warn("AIC.growth_justified(): too few data to evaluate quality reliably.") - n=self.minpoints(k_actual) + logger.warning("AIC.growth_justified(): too few data to evaluate quality reliably.") + n = self.minpoints(k_actual) - penalty=self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) + penalty = self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) return penalty < self.chisq @staticmethod def akaikeweights(aics): - """Return sequence of Akaike weights for sequence of AICs""" + """Return sequence of Akaike weights for sequence of AICs + + Parameters + ---------- + aics : array-like + The sequence of AIC instance. + + Returns + ------- + array-like + The sequence of Akaike weights + """ aic_stats = np.array([aic.stat for aic in aics]) aic_min = min(aic_stats) - return np.exp(-(aic_stats-aic_min)/2.) + return np.exp(-(aic_stats - aic_min) / 2.0) @staticmethod def akaikeprobs(aics): - """Return sequence of Akaike probabilities for sequence of AICs""" + """Return sequence of Akaike probabilities for sequence of AICs + + Parameters + ---------- + aics : array-like + The sequence of AIC instance. + + Returns + ------- + array-like + The sequence of Akaike probabilities""" aic_weights = AIC.akaikeweights(aics) - return aic_weights/np.sum(aic_weights) + return aic_weights / np.sum(aic_weights) + # end of class AIC # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - m1=AIC() - m2=AIC() + m1 = AIC() + m2 = AIC() m1.stat = 20 m2.stat = 30 - print m2 > m1 + print(m2 > m1) diff --git a/diffpy/srmise/modelevaluators/aicc.py b/src/diffpy/srmise/modelevaluators/aicc.py similarity index 51% rename from diffpy/srmise/modelevaluators/aicc.py rename to src/diffpy/srmise/modelevaluators/aicc.py index 57f0713..68dd9a8 100644 --- a/diffpy/srmise/modelevaluators/aicc.py +++ b/src/diffpy/srmise/modelevaluators/aicc.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -15,13 +16,13 @@ import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.modelevaluators.base import ModelEvaluator from diffpy.srmise.srmiseerrors import SrMiseModelEvaluatorError logger = logging.getLogger("diffpy.srmise") -class AICc (ModelEvaluator): + +class AICc(ModelEvaluator): """Evaluate and compare models with the AICc statistic. Akaike's Information Criterion w/ 2nd order correction for small sample @@ -50,13 +51,21 @@ def __init__(self): def evaluate(self, fit, count_fixed=False, kshift=0): """Return quality of fit for given ModelCluster using AICc (Akaike's Information Criterion - with 2nd order correction for small sample size). - - Parameters - fit: A ModelCluster - count_fixed: Whether fixed parameters are considered. - kshift: (0) Treat the model has having this many additional - parameters. Negative values also allowed.""" + with 2nd order correction for small sample size). + + Parameters + fit: A ModelCluster + The ModelCluster to evaluate. + count_fixed : bool + Whether fixed parameters are considered. Default is False. + kshift : int + Treat the model has having this many additional + parameters. Negative values also allowed. Default is 0. + + Returns + ------- + float + Quality of AICc""" # Number of parameters. By default, fixed parameters are ignored. k = fit.model.npars(count_fixed=count_fixed) + kshift if k < 0: @@ -67,84 +76,145 @@ def evaluate(self, fit, count_fixed=False, kshift=0): n = fit.size if n < self.minpoints(k): - logger.warn("AICc.evaluate(): too few data to evaluate quality reliably.") + logger.warning("AICc.evaluate(): too few data to evaluate quality reliably.") n = self.minpoints(k) - if self.chisq == None: + if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) self.stat = self.chisq + self.parpenalty(k, n) return self.stat - def minpoints(self, npars): - """Calculates the minimum number of points required to make an estimate of a model's quality.""" + """Calculates the minimum number of points required to make an estimate of a model's quality. + + Parameters + ---------- + npars : int + The number of points required to make an estimate of a model's quality. + + Returns + ------- + int + The minimum number of points required to make an estimate of a model's quality. + """ # From the denominator of AICc, it is clear that the first positive finite contribution to # parameter cost is at n>=k+2 return npars + 2 def parpenalty(self, k, n): - """Returns the cost for adding k parameters to the current model cluster.""" + """Returns the cost for adding k parameters to the current model cluster. + + Parameters + ---------- + k : int + The number of parameters to add. - #Weight the penalty for additional parameters. - #If this isn't 1 there had better be a good reason. - fudgefactor = 1. + n : int + The number of data points. - return (2*k+float(2*k*(k+1))/(n-k-1))*fudgefactor + Returns + ------- + float + The cost for adding k parameters to the current model cluster. + """ + + # Weight the penalty for additional parameters. + # If this isn't 1 there had better be a good reason. + fudgefactor = 1.0 + + return (2 * k + float(2 * k * (k + 1)) / (n - k - 1)) * fudgefactor def growth_justified(self, fit, k_prime): - """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified given the current quality of the fit. - The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified - if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which - depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future.""" + """Is adding k_prime parameters to ModelCluster justified given the current quality of the fit. + + The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, + and so adding it is justified if the cost of adding these parameters is less than the current + chiSquared cost. The validity of this assumption (which depends on an unknown chiSquared value) + and the impact of the errors used should be examined more thoroughly in the future. + + Parameters + ---------- + fit : ModelCluster + The ModelCluster to evaluate. + k_prime : int + The prime number of parameters to add. + + Returns + ------- + bool + Whether the current model cluster is justified or not. + """ if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) - k_actual = fit.model.npars(count_fixed=False) #parameters in current fit - k_test = k_actual + k_prime #parameters in prospective fit - n = fit.size #the number of data points included in the fit + k_actual = fit.model.npars(count_fixed=False) # parameters in current fit + k_test = k_actual + k_prime # parameters in prospective fit + n = fit.size # the number of data points included in the fit # If there are too few points to calculate AICc with the requested number of parameter # then clearly that increase in parameters is not justified. if n < self.minpoints(k_test): return False - #assert n >= self.minPoints(kActual) #check that AICc is defined for the actual fit + # assert n >= self.minPoints(kActual) #check that AICc is defined for the actual fit if n < self.minpoints(k_actual): - logger.warn("AICc.growth_justified(): too few data to evaluate quality reliably.") - n=self.minpoints(k_actual) + logger.warning("AICc.growth_justified(): too few data to evaluate quality reliably.") + n = self.minpoints(k_actual) - penalty=self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) + penalty = self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) return penalty < self.chisq @staticmethod def akaikeweights(aics): - """Return sequence of Akaike weights for sequence of AICs""" + """Return sequence of Akaike weights for sequence of AICs + + Parameters + ---------- + aics : array-like + The squence of AIC instances + + Returns + ------- + array-like + The sequence of Akaike weights + """ aic_stats = np.array([aic.stat for aic in aics]) aic_min = min(aic_stats) - return np.exp(-(aic_stats-aic_min)/2.) + return np.exp(-(aic_stats - aic_min) / 2.0) @staticmethod def akaikeprobs(aics): - """Return sequence of Akaike probabilities for sequence of AICs""" + """Return sequence of Akaike probabilities for sequence of AICs + + Parameters + ---------- + aics : array-like + The squence of AIC instances + + Returns + ------- + array-like + The sequence of Akaike probabilities""" aic_weights = AICc.akaikeweights(aics) - return aic_weights/np.sum(aic_weights) + return aic_weights / np.sum(aic_weights) + # end of class AICc # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - m1=AICc() - m2=AICc() + m1 = AICc() + m2 = AICc() m1.stat = 20 m2.stat = 30 - print m2 > m1 + print(m2 > m1) diff --git a/diffpy/srmise/modelevaluators/base.py b/src/diffpy/srmise/modelevaluators/base.py similarity index 73% rename from diffpy/srmise/modelevaluators/base.py rename to src/diffpy/srmise/modelevaluators/base.py index eb28272..179e082 100644 --- a/diffpy/srmise/modelevaluators/base.py +++ b/src/diffpy/srmise/modelevaluators/base.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -43,9 +44,6 @@ import numpy as np -import diffpy.srmise.srmiselog -from diffpy.srmise.srmiseerrors import SrMiseModelEvaluatorError - logger = logging.getLogger("diffpy.srmise") @@ -55,8 +53,14 @@ class ModelEvaluator: worse models.""" def __init__(self, method, higher_is_better): - """method = name of method (string) - higher_is_better = boolean + """Constructor of ModelEvaluator + + Parameters + ---------- + method : str + The name of method + higher_is_better : bool + The boolean to compare higher or lower degree model. """ self.method = method self.higher_is_better = higher_is_better @@ -68,11 +72,9 @@ def __lt__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated - if self.higher_is_better: + if self.higher_is_better is not None: return self.stat < other.stat else: return other.stat < self.stat @@ -81,11 +83,9 @@ def __le__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated - if self.higher_is_better: + if self.higher_is_better is not None: return self.stat <= other.stat else: return other.stat <= self.stat @@ -94,9 +94,7 @@ def __eq__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated return self.stat == other.stat @@ -104,9 +102,7 @@ def __ne__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated return self.stat != other.stat @@ -114,11 +110,9 @@ def __gt__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated - if self.higher_is_better: + if self.higher_is_better is not None: return self.stat > other.stat else: return other.stat > self.stat @@ -127,17 +121,24 @@ def __ge__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated - if self.higher_is_better: + if self.higher_is_better is not None: return self.stat >= other.stat else: return other.stat >= self.stat def chi_squared(self, expected, observed, error): - """Calculates chi-squared statistic.""" + """Calculates chi-squared statistic. + + Parameters + ---------- + expected : float + The expected value. + observed : float + The observed value. + error : float + The error statistic.""" self.chisq = np.sum((expected - observed) ** 2 / error**2) return self.chisq diff --git a/src/diffpy/srmise/modelparts.py b/src/diffpy/srmise/modelparts.py new file mode 100644 index 0000000..43a2141 --- /dev/null +++ b/src/diffpy/srmise/modelparts.py @@ -0,0 +1,790 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## +"""Module for representing instances of mathematical functions. + +Classes +------- +ModelPart: Superclass of Peak and Baseline +ModelParts: Collection (list) of ModelPart instances. +""" + +import logging +from importlib.metadata import version + +import matplotlib.pyplot as plt +import numpy as np + +# Output of scipy.optimize.leastsq for a single parameter changed in scipy 0.8.0 +# Before it returned a scalar, later it returned an array of length 1. +from packaging.version import parse +from scipy.optimize import leastsq + +from diffpy.srmise import srmiselog +from diffpy.srmise.srmiseerrors import SrMiseFitError, SrMiseStaticOwnerError, SrMiseUndefinedCovarianceError + +logger = logging.getLogger("diffpy.srmise") +__spv__ = version("scipy") +__oldleastsqbehavior__ = parse(__spv__) < parse("0.8.0") + + +class ModelParts(list): + """A collection of ModelPart instances. + + Methods + ------- + copy: Return deep copy + fit: Fit to given data + npars: Return total number of parameters + pack_freepars: Update free parameters with values in given sequence + residual: Return residual of model + residual_jacobian: Return jacobian of residual of model + transform: Change format of parameters. + value: Return value of model + unpack_freepars: Return sequence containing value of all free parameters + """ + + def __init__(self, *args, **kwds): + list.__init__(self, *args, **kwds) + + def fit( + self, + r, + y, + y_error, + range=None, + ntrials=0, + cov=None, + cov_format="default_output", + ): + """Chi-square fit of all free parameters to given data. + + There must be at least as many free parameters as data points. + Fitting is performed with the MINPACK leastsq() routine exposed by scipy. + + Parameters + ---------- + r : array-like + The sequence of r values over which to fit + y : array-like + The sequence of y values over which to fit + y_error : array-like + The sequence of uncertainties in y + range : slice object + The slice object specifying region of r and y over which to fit. + Fits over all the data by default. + ntrials : int + The maximum number of function evaluations while fitting. + cov : ModelCovariance instance + The Optional ModelCovariance object preserves covariance information. + cov_format : str + The parameterization to use in cov. + + Returns + ------- + None + """ + freepars = self.unpack_freepars() + if len(freepars) >= len(r): + emsg = ( + "Cannot fit model with " + + str(len(freepars)) + + " free parametersbut only " + + str(len(r)) + + " data points." + ) + raise SrMiseFitError(emsg) + if len(freepars) == 0: + # emsg = "Cannot fit model with no free parameters." + # raise SrMiseFitError(emsg) + return + + if range is None: + range = slice(None) + + args = (r, y, y_error, range) + + if srmiselog.liveplots: + plt.figure(1) + plt.ioff() + plt.subplot(211) + plt.cla() + plt.title("Before") + plt.plot(r, y, label="_nolabel_") + plt.plot( + r, + (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), + label="_nolabel_", + ) + for p in self: + plt.plot(r, p.value(r, range=range), label=str(p)) + plt.ion() + + try: + f = leastsq( + self.residual, # minimize this function + freepars, # initial parameters + args=args, # arguments to residual, residual_jacobian + Dfun=self.residual_jacobian, # explicit Jacobian + col_deriv=True, # order of derivatives in Jacobian + full_output=True, + maxfev=ntrials, + ) + except NotImplementedError: + # TODO: Figure out if is worth checking for residual_jacobian + # before leastsq(). This exception will either occur almost never + # or extremely frequently, and the extra evaluations will add up. + logger.info("One or more functions do not define residual_jacobian().") + f = leastsq( + self.residual, # minimize this function + freepars, # initial parameters + args=args, # arguments to residual + col_deriv=True, # order of derivatives in Jacobian + full_output=True, + maxfev=ntrials, + ) + except Exception: + # Sadly, KeyboardInterrupt, etc. is reraised as minpack.error + # Not much I can do about that, though. + import traceback + + emsg = ( + "Unexpected error in modelparts.fit(). Original exception:\n" + + traceback.format_exc() + + "End original exception." + ) + raise SrMiseFitError(emsg) + + result = f[0] + if __oldleastsqbehavior__ and len(freepars) == 1: + # leastsq returns a scalar when there is only one parameter + result = np.array([result]) + + self.pack_freepars(result) + + if srmiselog.liveplots: + plt.draw() + plt.ioff() + plt.figure(1) + plt.subplot(212) + plt.cla() + plt.title("After") + plt.ion() + plt.plot( + r, + y, + r, + (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), + *[i for sublist in [[r, p.value(r, range=range)] for p in self] for i in sublist], + ) + plt.draw() + + if srmiselog.wait: + print( + "Press 'Enter' to continue...", + ) + input() + + if f[4] not in (1, 2, 3, 4): + emsg = "Fit did not succeed -- " + str(f[3]) + raise SrMiseFitError(emsg) + + # clean up parameters + for p in self: + p.pars = p.owner().transform_parameters(p.pars, in_format="internal", out_format="internal") + + # Supply estimated covariance matrix if requested. + # The precise relationship between f[1] and estimated covariance matrix is a little unclear from + # the documentation of leastsq. This is the interpretation given by scipy.optimize.curve_fit, + # which is a wrapper around leastsq. + if cov is not None: + pcov = f[1] + fvec = f[2]["fvec"] + dof = len(r) - len(freepars) + cov.setcovariance(self, pcov * np.sum(fvec**2) / dof) + try: + cov.transform(in_format="internal", out_format=cov_format) + except SrMiseUndefinedCovarianceError: + logger.warning("Covariance not defined. Fit may not have converged.") + + return + + # # Notes on the fit f + # f[0] = solution + # f[1] = Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. + # None if a singular matrix encountered (indicates very flat curvature in some direction). + # This matrix must be multiplied by the residual variance to get the covariance of the parameter + # estimates - see curve fit. + # f[2] = dictionary{nfev: int, fvec: array(), fjac: array(), ipvt: array(), qtf: array()} + # nfev - The number of function calls made + # fvec - function (residual) evaluated at solution + # fjac - "a permutation of the R matrix of a QR factorization of the final Jacobian." + # ipvt - integer array defining a permutation matrix P such that fjac*P=QR + # qtf - transpose(q)*fvec + # f[3] = message about results of fit + # f[4] = integer flag. Fit was successful on 1,2,3, or 4. Otherwise unsuccessful. + + def npars(self, count_fixed=True): + """Return total number of parameters in all parts. + + Parameters + ---------- + count_fixed : bool + The boolean which determines if fixed parameters are + included in the count. + + Returns + ------- + n : int + The total number of parameters. + """ + n = 0 + for p in self: + n += p.npars(count_fixed=count_fixed) + return n + + def pack_freepars(self, freepars): + """Update parameters with values from sequence of freepars. + + Parameters + ---------- + freepars : array-like + The sequence of free parameters. + + Returns + ------- + None + """ + if np.isnan(freepars).any(): + emsg = "Non-numeric free parameters." + raise ValueError(emsg) + freeidx = 0 + for p in self: + freeidx += p.update(freepars[freeidx:]) + + def residual(self, freepars, r, y_expected, y_error, range=None): + """Calculate residual of all parameters. + + Parameters + ---------- + freepars : array-like + The sequence of free parameters + r : array-like + The input domain + y_expected : array-like + The sequence of expected values + y_error : array-like + The sequence of uncertainties in y-variable + range : slice object + The slice object specifying region of r and y over which to fit. + All the data by default. + + Returns + ------- + array-like + The residual of all parameters. + """ + self.pack_freepars(freepars) + total = self.value(r, range) + try: + if range is None: + range = slice(0, len(r)) + return (y_expected[range] - total[range]) / y_error[range] + except TypeError: + return (y_expected - total) / y_error + + def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): + """Calculate the Jacobian of freepars. + + Parameters + freepars : array-like + The sequence of free parameters + r : array-like + The input domain + y_expected : array-like + The sequence of expected values + y_error : array-like + The sequence of uncertainties in y-variable + range : slice object + The slice object specifying region of r and y over which to fit. + All the data by default. + + Returns + ------- + ndarray + The Jacobian of all parameters. + """ + if len(freepars) == 0: + raise ValueError( + "Argument freepars has length 0. The Jacobian " "is only defined with >=1 free parameters." + ) + + self.pack_freepars(freepars) + tempJac = [] + for p in self: + tempJac[len(tempJac) :] = p.jacobian(r, range) + # Since the residual is (expected - calculated) the jacobian + # of the residual has a minus sign. + jac = -np.array([j for j in tempJac if j is not None]) + try: + if range is None: + range = slice(0, len(r)) + return jac[:, range] / y_error[range] + except TypeError: + return jac / y_error + + def value(self, r, range=None): + """Calculate total value of all parts over range. + + Parameters + ---------- + r : array-like + The input domain + range : slice object + The slice object specifying region of r and y over which to fit. + All the data by default. + + Returns + ------- + total : float + The total value of all slice region of r. + """ + total = r * 0.0 + for p in self: + total += p.value(r, range) + return total + + def unpack_freepars(self): + """Return array of all free parameters.""" + # To check: ravel() sometimes returns a reference and othertimes a copy. + # Do I need to use flatten() instead? + return np.concatenate([p.compress() for p in self]).ravel() + + def covariance(self, format="internal", **kwds): + """Return estimated covariance matrix of the model. + + The covariance matrix may be given in terms of any parameterization + defined by the formats for each individual ModelPart. + + Parameters + ---------- + format : str + The format ("internal" by default) to use for all ModelParts. + This may be overridden for specific peaks as shown below. + + Keywords + -------- + f0 : str + The format of the 0th ModelPart + f1 : str + The format of the 1st ModelPart + etc. + + Returns + ------- + cov : ndarray + The estimated covariance matrix. + """ + formats = [format for p in self] + + for k, v in kwds.items(): + try: + int(k[1:]) + except ValueError: + emsg = "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." % k + raise ValueError(emsg) + + formats[int(k[1:])] = v + + return + + def copy(self): + """Return deep copy of this ModelParts. + + The original and the copy are completely independent, except each + ModelPart and its copy still reference the same owner. + + Returns + ------- + ModelParts + The deep copy of this ModelParts. + """ + return type(self).__call__([p.copy() for p in self]) + + def __str__(self): + """Return string representation of this ModelParts.""" + return "".join([str(p) + "\n" for p in self]) + + def __getitem__(self, index): + """Extends list.__getitem__""" + if isinstance(index, tuple) and len(index) == 2: + start, end = index + return self.__class__(super().__getitem__(slice(start, end))) + else: + return super().__getitem__(index) + + def transform(self, in_format="internal", out_format="internal"): + """Transforms format of parameters in this modelpart. + + Parameters + in_format : str + The format the parameters are already in. + out_format : str + The format the parameters are transformed to. + """ + for p in self: + try: + p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) + except ValueError: + logger.info( + "Invalid parameter transformation: Ignoring %s->%s for function of type %s." + % (in_format, out_format, p.owner().getmodule()) + ) + + +# End of class ModelParts + + +class ModelPart(object): + """Represents a single part (instance of some function) of a model. + + Attributes + ------- + pars : array-like + The array containing the parameters of this model part + free : array-like + The array containing boolean values defining whether the corresponding parameter + is free or not. + removable : bool + The boolean determining whether or not this model part can be + removed during extraction. + static_owner : bool + The boolean determines if owner can be changed with changeowner() + + Methods + ------- + changeowner(new_owner) + Change the owner of the model part instance. + copy() + Return a deep copy of the model part instance. + compress() + Return parameters with non-free parameters removed. + jacobian() + Compute and return the Jacobian matrix for the model part. + getfree(index=None, keyword=None) + Retrieve a free parameter by index or keyword defined by the owner. + npars() + Return the number of parameters in this model part. + owner() + Return the current owner of the model part. + setfree(index=None, value=None, keyword=None, new_value=None) + Set a free parameter by index or keyword defined by the owner. + update(values) + Update free parameters with values from a given sequence. + value() + Compute and return the value of the model part based on its parameters. + writestr() + Generate and return a string representation of the model part. + """ + + def __init__(self, owner, pars, free=None, removable=True, static_owner=False): + """Constructor for instance members. + + Parameters + owner : BaseFunction subclass + The instance of a BaseFunction subclass + pars : array-like + The sequence of parameters which specify the function explicitly + free : array-like + The sequence of Boolean variables. If False, the corresponding + parameter will not be changed. + removable : bool + The boolean determines whether this part can be removed. + static_owner : bool + Whether or not the part can be changed with + changeowner() + + Note that free and removable are not mutually exclusive. If any + pars are not free but removable=True then the part may be removed, but + the held parameters for this part will remain unchanged until then. + """ + self._owner = owner + + if len(pars) != owner.npars: + emsg = "The length of pars must equal the number of parameters " + "specified by the model part owner." + raise ValueError(emsg) + self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart + + if free is None: + self.free = np.array([True for p in pars], dtype=bool) + else: + self.free = np.array(free, dtype=bool) + if len(self.free) != owner.npars: + emsg = ( + "The length of free must be equal to the number of " + + "parameters specified by the model part owner." + ) + raise ValueError(emsg) + + self.removable = removable + self.static_owner = static_owner + + def changeowner(self, owner): + """Change the owner of this part. + + Does not change the parameters associated with this model part. Raises + SrMiseStaticOwnerError if this peak has been declared to have a static + owner, or if the number of parameters is incompatible. + + Parameters + ---------- + owner : BaseFunction subclass + The instance of a BaseFunction subclass + + Returns + ------- + None + """ + if self.static_owner and self._owner is not owner: + emsg = "Cannot change owner if static_owner is True." + raise SrMiseStaticOwnerError(emsg) + if self._owner.npars != owner.npars: + emsg = "New owner specifies different number of parameters than " + "original owner." + raise SrMiseStaticOwnerError(emsg) + self._owner = owner + + def compress(self): + """Return part parameters with non-free values removed. + + Returns + ------- + pars : array-like + The compressed parameters of the model part.""" + return self.pars[self.free] + + def jacobian(self, r, range=None): + """Return jacobian of this part over r. + + Parameters + ---------- + r : array-like + The input domain + range : slice object + The slice object specifying region of r and y over which to fit. + All the data by default. + + Returns + ------- + jacobian : array-like + The jacobian of the model part. + """ + return self._owner.jacobian(self, r, range) + + def owner(self): + """Return the BaseFunction subclass instance which owns this part. + + Returns + ------- + BaseFunction subclass + The BaseFunction subclass which owns this part.""" + return self._owner + + def update(self, freepars): + """Sequentially update free parameters from freepars. + + Parameters + ---------- + freepars : array-like + The sequence of new parameter values. May contain more + parameters than can actually be updated. + + Returns + ------- + numfree + number of parameters updated from freepars. + """ + numfree = self.npars(count_fixed=False) + if len(freepars) < numfree: + pass # raise "freepars does not have enough elements to + # update every unheld parameter." + # TODO: Check if I need to make copies here, or if references + # to parameters are safe. + self.pars[self.free] = freepars[:numfree] + return numfree + + def value(self, r, range=None): + """Return value of peak over r. + + Parameters + ---------- + r : array-like + The input domain + range : slice object + The slice object specifying region of r and y over which to fit. + All the data by default. + + Returns + ------- + value : array-like + The value of peak over r. + """ + return self._owner.value(self, r, range) + + def copy(self): + """Return a deep copy of this ModelPart. + + The original and the copy are completely independent, except they both + reference the same owner. + + Returns + ------- + ModelPart + A deep copy of this ModelPart. + """ + return type(self).__call__(self._owner, self.pars, self.free, self.removable, self.static_owner) + + def __getitem__(self, key_or_idx): + """Return parameter of peak corresponding with key_or_idx. + + Parameters + ---------- + key_or_idx : Optional[int, slice, key] + The integer index, slice, or key from owner's parameter + dictionary. + + Returns + ------- + pars : array-like + The value of the peak corresponding to key_or_idx. + """ + if key_or_idx in self._owner.parameterdict: + return self.pars[self._owner.parameterdict[key_or_idx]] + else: + return self.pars[key_or_idx] + + def getfree(self, key_or_idx): + """Return value of free corresponding with key_or_idx. + + Parameters + ---------- + key_or_idx : Optional[int, slice object, key] + The integer index, slice, or key from owner's parameter + dictionary. + + Returns + ------- + freepars : array-like + The value of the free corresponding to key_or_idx. + """ + if key_or_idx in self._owner.parameterdict: + return self.free[self._owner.parameterdict[key_or_idx]] + else: + return self.free[key_or_idx] + + def setfree(self, key_or_idx, value): + """Set value of free corresponding with key_or_idx. + + Parameters + ---------- + key_or_idx : Optional[int, slice object, key] + The integer index, slice, or key from owner's parameter + dictionary. + value : bool + The boolean to set in free corresponding to key_or_idx. + + Returns + ------- + None + """ + if key_or_idx in self._owner.parameterdict: + self.free[self._owner.parameterdict[key_or_idx]] = value + else: + self.free[key_or_idx] = value + + def __len__(self): + """Return number of parameters, including any fixed ones.""" + return self._owner.npars + + def npars(self, count_fixed=True): + """Return total number of parameters in all parts. + + Parameters + ---------- + count_fixed : bool + The boolean which determines if fixed parameters are + included in the count. + + Returns + ------- + int + The number of parameters in all parts.""" + if count_fixed: + return self._owner.npars + else: + return np.sum(self.free) + + def __str__(self): + """Return string representation of ModelPart parameters.""" + return str(self._owner.transform_parameters(self.pars, in_format="internal", out_format="default_output")) + + def __eq__(self, other): + """ """ + if hasattr(other, "_owner"): + return ( + (self._owner is other._owner) + and np.all(self.pars == other.pars) + and np.all(self.free == other.free) + and self.removable == other.removable + ) + else: + return False + + def __ne__(self, other): + """ """ + return not self == other + + def writestr(self, ownerlist): + """Return string representation of ModelPart. + + The value of owner is determined by its index in ownerlist. + + Parameters + ---------- + ownerlist : array-like + The list of owner functions + + Returns + ------- + datastring + The string representation of ModelPart. + """ + if self._owner not in ownerlist: + emsg = "ownerlist does not contain this ModelPart's owner." + raise ValueError(emsg) + lines = [] + lines.append("owner=%s" % repr(ownerlist.index(self._owner))) + + # Lists/numpy arrays don't give full representation of long lists + lines.append("pars=[%s]" % ", ".join([repr(p) for p in self.pars])) + lines.append("free=[%s]" % ", ".join([repr(f) for f in self.free])) + lines.append("removable=%s" % repr(self.removable)) + lines.append("static_owner=%s" % repr(self.static_owner)) + datastring = "\n".join(lines) + "\n" + return datastring + + +# End of class ModelPart + +# simple test code +if __name__ == "__main__": + pass diff --git a/diffpy/srmise/multimodelselection.py b/src/diffpy/srmise/multimodelselection.py similarity index 62% rename from diffpy/srmise/multimodelselection.py rename to src/diffpy/srmise/multimodelselection.py index 35d9e08..c4c7b74 100644 --- a/diffpy/srmise/multimodelselection.py +++ b/src/diffpy/srmise/multimodelselection.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -17,19 +18,20 @@ import numpy as np from matplotlib import transforms -import diffpy.srmise.srmiselog -from diffpy.srmise import ModelCluster, PeakStability -from diffpy.srmise.modelevaluators.base import ModelEvaluator +from diffpy.srmise.modelcluster import ModelCluster +from diffpy.srmise.peakstability import PeakStability logger = logging.getLogger("diffpy.srmise") + def eatkwds(*args, **kwds): """Convenience function to remove all keywords in args from kwds.""" for k in args: if k in kwds: - print "Keyword %s=%s ignored." %(k, kwds.pop(k)) + print("Keyword %s=%s ignored." % (k, kwds.pop(k))) return kwds + class MultimodelSelection(PeakStability): """Quick and dirty multimodel selection using AIC and its offspring.""" @@ -52,7 +54,7 @@ def __init__(self): self.classweights = {} self.classprobs = {} self.sortedclassprobs = {} - self.sortedclasses = {} # dg->as self.classes, but with model indices sorted by best AIC + self.sortedclasses = {} # dg->as self.classes, but with model indices sorted by best AIC PeakStability.__init__(self) return @@ -60,14 +62,22 @@ def __init__(self): def makeaics(self, dgs, dr, filename=None): """Test quality of each model for all possible uncertainties. - Parameters: - dgs - Array of uncertainties over which to test each model. - dr - The sampling rate to use. This determines the actual data to use - for testing, since sometimes the actual result is different than the - nominal value. - filename - Optional file to save pickled results + Parameters + ---------- + dgs : array-like + The array of uncertainties over which to test each model. + dr : float + The sampling rate to use. This determines the actual data to use + for testing, since sometimes the actual result is different than the + nominal value. + filename : str + Optional file to save pickled results + + Returns + ------- + None """ - aics_out = {} # Version of self.aics that holds only the statistic, not the AIC object. + aics_out = {} # Version of self.aics that holds only the statistic, not the AIC object. self.dgs = np.array(dgs) for i, dg in enumerate(self.dgs): self.dgs_idx[dg] = i @@ -77,12 +87,11 @@ def makeaics(self, dgs, dr, filename=None): (r, y, dr, dy) = self.ppe.resampledata(dr) for model_idx in range(len(self.results)): - print "Testing model %s of %s." %(model_idx, len(self.results)) + print("Testing model %s of %s." % (model_idx, len(self.results))) result = self.results[model_idx] em = self.ppe.error_method - # This section dependent on spaghetti code elsewhere in srmise! # Short cut evaluation of AICs which doesn't require calculating chi-square # over and over again. This method assumes that the various uncertainties @@ -95,13 +104,13 @@ def makeaics(self, dgs, dr, filename=None): # modelevaluators subpackage are in need of a rewrite, and so it would be # best to do them all at once. dg0 = self.dgs[0] - mc = ModelCluster(result[1], result[2], r, y, dg0*np.ones(len(r)), None, em, self.ppe.pf) + mc = ModelCluster(result[1], result[2], r, y, dg0 * np.ones(len(r)), None, em, self.ppe.pf) em0 = mc.quality() for dg in self.dgs: em_instance = em() - em_instance.chisq = em0.chisq*(dg0/dg)**2 # rescale chi-square - em_instance.evaluate(mc) # evaluate AIC without recalculating chi-square + em_instance.chisq = em0.chisq * (dg0 / dg) ** 2 # rescale chi-square + em_instance.evaluate(mc) # evaluate AIC without recalculating chi-square self.aics[dg].append(em_instance) aics_out[dg].append(em_instance.stat) @@ -111,22 +120,32 @@ def makeaics(self, dgs, dr, filename=None): if filename is not None: try: - import cPickle as pickle - except: - import pickle - out_s = open(filename, 'wb') + import cPickle as pickle + except ImportError: + import pickle + out_s = open(filename, "wb") pickle.dump(aics_out, out_s) out_s.close() return def loadaics(self, filename): - """Load file containing results of the testall method.""" + """Load file containing results of the testall method. + + Parameters + ---------- + filename : str + Filename to load. + + Returns + ------- + None + """ try: - import cPickle as pickle - except: - import pickle - in_s = open(filename, 'rb') + import cPickle as pickle + except ImportError: + import pickle + in_s = open(filename, "rb") aics_in = pickle.load(in_s) in_s.close() @@ -152,6 +171,7 @@ def loadaics(self, filename): return def makeaicweights(self): + """Make weights for the aic Modelevaluators.""" self.aicweights = {} em = self.ppe.error_method @@ -159,6 +179,7 @@ def makeaicweights(self): self.aicweights[dg] = em.akaikeweights(self.aics[dg]) def makeaicprobs(self): + """Make probabilities for the sequence of AICs.""" self.aicprobs = {} em = self.ppe.error_method @@ -166,29 +187,34 @@ def makeaicprobs(self): self.aicprobs[dg] = em.akaikeprobs(self.aics[dg]) def makesortedprobs(self): + """Make probabilities for the sequence of AICs in a sorted order.""" self.sortedprobs = {} for dg in self.dgs: self.sortedprobs[dg] = np.argsort(self.aicprobs[dg]).tolist() - def animate_probs(self, step=False, duration=0., **kwds): + def animate_probs(self, step=False, duration=0.0, **kwds): """Show animation of extracted peaks from first to last. - Parameters: - step - Require keypress to show next plot - duration - Minimum time in seconds to complete animation. Default 0. + Parameters + ---------- + step : bool + Require keypress to show next plot, default is False. + duration : float + Minimum time in seconds to complete animation. Default is 0. Keywords passed to pyplot.plot()""" if duration > 0: import time - sleeptime = duration/len(self.dgs) + + sleeptime = duration / len(self.dgs) plt.ion() plt.subplot(211) best_idx = self.sortedprobs[self.dgs[0]][-1] - line, = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) + (line,) = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) vline = plt.axvline(self.dgs[0]) - dot, = plt.plot(self.dgs[best_idx],self.aicprobs[self.dgs[0]][best_idx],'ro') + (dot,) = plt.plot(self.dgs[best_idx], self.aicprobs[self.dgs[0]][best_idx], "ro") plt.subplot(212) self.setcurrent(best_idx) @@ -208,37 +234,46 @@ def animate_probs(self, step=False, duration=0., **kwds): plt.ion() plt.draw() if step: - raw_input() + input() if duration > 0: time.sleep(sleeptime) - def animate_classprobs(self, step=False, duration=0., **kwds): + def animate_classprobs(self, step=False, duration=0.0, **kwds): """Show animation of extracted peaks from first to last. - Parameters: - step - Require keypress to show next plot - duration - Minimum time in seconds to complete animation. Default 0. + Parameters + ---------- + step : bool + Require keypress to show next plot, default is False. + duration : float + Minimum time in seconds to complete animation. Default is 0. Keywords passed to pyplot.plot()""" if duration > 0: import time - sleeptime = duration/len(self.dgs) + + sleeptime = duration / len(self.dgs) plt.ion() ax1 = plt.subplot(211) bestclass_idx = self.sortedclassprobs[self.dgs[0]][-1] best_idx = self.sortedclasses[self.dgs[0]][bestclass_idx][-1] - arrow_left = len(self.classes)-1 - arrow_right = arrow_left + .05*arrow_left - line, = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) - dot, = plt.plot(self.dgs[best_idx],self.classprobs[self.dgs[0]][bestclass_idx],'ro') - plt.axvline(arrow_left, color='k') + arrow_left = len(self.classes) - 1 + arrow_right = arrow_left + 0.05 * arrow_left + (line,) = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) + (dot,) = plt.plot(self.dgs[best_idx], self.classprobs[self.dgs[0]][bestclass_idx], "ro") + plt.axvline(arrow_left, color="k") ax2 = ax1.twinx() - ax2.set_ylim(self.dgs[0],self.dgs[-1]) + ax2.set_ylim(self.dgs[0], self.dgs[-1]) ax2.set_ylabel("dg") ax1.set_xlim(right=arrow_right) ax2.set_xlim(right=arrow_right) - dgarrow = ax2.annotate("",(arrow_right, self.dgs[0]), (arrow_left, self.dgs[0]), arrowprops=dict(arrowstyle="-|>")) + dgarrow = ax2.annotate( + "", + (arrow_right, self.dgs[0]), + (arrow_left, self.dgs[0]), + arrowprops=dict(arrowstyle="-|>"), + ) plt.subplot(212) self.setcurrent(best_idx) @@ -247,7 +282,7 @@ def animate_classprobs(self, step=False, duration=0., **kwds): minval = np.min(val[1::2]) [r, res] = tempcluster.plottable_residual() plt.plot(*val) - plt.plot(r, minval-np.max(res)+res) + plt.plot(r, minval - np.max(res) + res) for dg in self.dgs[1:]: plt.ioff() line.set_ydata(self.classprobs[dg]) @@ -263,13 +298,13 @@ def animate_classprobs(self, step=False, duration=0., **kwds): minval = np.min(val[1::2]) [r, res] = tempcluster.plottable_residual() plt.plot(*val) - plt.plot(r, minval-np.max(res)+res) + plt.plot(r, minval - np.max(res) + res) dot.set_xdata(bestclass_idx) dot.set_ydata(self.classprobs[dg][bestclass_idx]) plt.ion() plt.draw() if step: - raw_input() + input() if duration > 0: time.sleep(sleeptime) @@ -286,25 +321,31 @@ def classify(self, r, tolerance=0.05): 2) The exemplar (first model) of each class isn't the best representative 3) The parameters vary so smoothly there aren't actually definite classes - Parameters: - r - The r values over which to evaluate the models - tolerance - The fraction below which models are considered the same + Parameters + ---------- + r : array-like + The r values over which to evaluate the models + tolerance : float + The fraction below which models are considered the same + Returns + ------- + None """ self.classes = [] self.classes_idx = {} self.class_tolerance = None - classes = [] # each element is a list of the models (result indices) in the class - classes_idx = {} # given an integer corresponding to a model, return its class - epsqval = {} # holds the squared value of each class' exemplar peaks - ebsqval = {} # holds the squared value of each class exemplar baselines + classes = [] # each element is a list of the models (result indices) in the class + classes_idx = {} # given an integer corresponding to a model, return its class + epsqval = {} # holds the squared value of each class' exemplar peaks + ebsqval = {} # holds the squared value of each class exemplar baselines for i in range(len(self.results)): peaks = self.results[i][1] baseline = self.results[i][2] - bsqval = baseline.value(r)**2 - psqval = [p.value(r)**2 for p in peaks] + bsqval = baseline.value(r) ** 2 + psqval = [p.value(r) ** 2 for p in peaks] added_to_class = False for c in range(len(classes)): @@ -312,17 +353,17 @@ def classify(self, r, tolerance=0.05): exemplar_baseline = self.results[classes[c][0]][2] # Check baseline type and number of parameters - if type(baseline) != type(exemplar_baseline): + if type(baseline) is not type(exemplar_baseline): continue if baseline.npars() != exemplar_baseline.npars(): continue # check peak types and number of parameters - badpeak=False + badpeak = False if len(peaks) != len(exemplar_peaks): continue - for p, ep in zip(peaks,exemplar_peaks): - if type(p) != type(ep): + for p, ep in zip(peaks, exemplar_peaks): + if type(p) is not type(ep): badpeak = True break if p.npars() != ep.npars(): @@ -332,20 +373,19 @@ def classify(self, r, tolerance=0.05): continue # check peak values - current_psqval = [] - for p, ep in zip(psqval,epsqval[c]): - basediff = np.abs(np.sum(p-ep)) - #if basediff > tolerance*np.sum(ep): - if basediff > tolerance*np.sum(ep) or basediff > tolerance*np.sum(p): + for p, ep in zip(psqval, epsqval[c]): + basediff = np.abs(np.sum(p - ep)) + # if basediff > tolerance*np.sum(ep): + if basediff > tolerance * np.sum(ep) or basediff > tolerance * np.sum(p): badpeak = True break if badpeak: continue # check baseline values - basediff = np.abs(np.sum(bsqval-ebsqval[c])) - #if basediff > tolerance*np.sum(ebsqval[c]): - if basediff > tolerance*np.sum(ebsqval[c]) or basediff > tolerance*np.sum(bsqval): + basediff = np.abs(np.sum(bsqval - ebsqval[c])) + # if basediff > tolerance*np.sum(ebsqval[c]): + if basediff > tolerance * np.sum(ebsqval[c]) or basediff > tolerance * np.sum(bsqval): continue # that's all the checks, add to current class @@ -357,7 +397,7 @@ def classify(self, r, tolerance=0.05): if added_to_class is False: # make a new class with the current model as exemplar classes.append([i]) - classnum = len(classes)-1 + classnum = len(classes) - 1 classes_idx[i] = classnum epsqval[classnum] = psqval ebsqval[classnum] = bsqval @@ -374,7 +414,6 @@ def classify(self, r, tolerance=0.05): def makesortedclasses(self): self.sortedclasses = {} - em = self.ppe.error_method for dg in self.dgs: bestinclass = [] @@ -388,6 +427,7 @@ def makesortedclasses(self): self.sortedclasses[dg] = bestinclass def makeclassweights(self): + """Make weights for all classes.""" self.classweights = {} em = self.ppe.error_method @@ -396,6 +436,7 @@ def makeclassweights(self): self.classweights[dg] = em.akaikeweights([self.aics[dg][b] for b in bestinclass]) def makeclassprobs(self): + """Make probabilities for all classes.""" self.classprobs = {} em = self.ppe.error_method @@ -404,17 +445,38 @@ def makeclassprobs(self): self.classprobs[dg] = em.akaikeprobs([self.aics[dg][b] for b in bestinclass]) def makesortedclassprobs(self): + """Make probabilities for all classes in sorted order.""" self.sortedclassprobs = {} for dg in self.dgs: self.sortedclassprobs[dg] = np.argsort(self.classprobs[dg]).tolist() def dg_key(self, dg_in): - """Return the dg value usable as a key nearest to dg_in.""" - idx = (np.abs(self.dgs-dg_in)).argmin() + """Return the dg value usable as a key nearest to dg_in. + + Parameters + ---------- + dg_in : The uncertainties of the model + + Returns + ------- + float + The dg value usable as a key nearest to dg_in.""" + idx = (np.abs(self.dgs - dg_in)).argmin() return self.dgs[idx] def bestclasses(self, dgs=None): + """Return the best classes for all models. + + Parameters + ---------- + dgs : array-like, optional + The uncertainties of the models, by default None + + Returns + ------- + array-like + The best classes for all models.""" if dgs is None: dgs = self.dgs best = [] @@ -423,6 +485,18 @@ def bestclasses(self, dgs=None): return np.unique(best) def bestmodels(self, dgs=None): + """Return the best models for all models. + + Parameters + ---------- + dgs : array-like, optional + The uncertainties of the models, by default None + + Returns + ------- + array-like + Sequence of best model + """ if dgs is None: dgs = self.dgs best = [] @@ -432,6 +506,19 @@ def bestmodels(self, dgs=None): return np.unique(best) def classbestdgs(self, cls, dgs=None): + """Return the best uncertainties for the models. + + Parameters + ---------- + cls : ModelEvaluator Class + Override corder with a specific class index, or None to ignore classes entirely. + dgs : array-like, optional + The uncertainties of the models, by default None + + Returns + ------- + array-like + Sequence of best uncertainties for the models.""" if dgs is None: dgs = self.dgs bestdgs = [] @@ -441,7 +528,20 @@ def classbestdgs(self, cls, dgs=None): return bestdgs def modelbestdgs(self, model, dgs=None): - """Return uncertainties where given model has greatest Akaike probability.""" + """Return uncertainties where given model has greatest Akaike probability. + + Parameters + ---------- + model : ModelEvaluator Class + The model evaluator class to use + dgs : array-like, optional + The uncertainties of the models, by default None + + Returns + ------- + array-like + The uncertainties where given model has greatest Akaike probability + """ if dgs is None: dgs = self.dgs bestdgs = [] @@ -455,17 +555,20 @@ def modelbestdgs(self, model, dgs=None): def plot3dclassprobs(self, **kwds): """Return 3D plot of class probabilities. - Keywords: + Keywords + -------- dGs - Sequence of dG values to plot. Default is all values. highlight - Sequence of dG values to highlight on plot. Default is []. classes - Sequence of indices of classes to plot. Default is all classes. - probfilter - [float1, float2]. Only show classes with maximum probability in given range. Default is [0., 1.] + probfilter - [float1, float2]. Only show classes with maximum probability in given range. + Default is [0., 1.] class_size - Report the size of each class as a "number" or "fraction". Default is "number". norm - A colors normalization for displaying number/fraction of models in class. Default is "auto". If equal to "full" determined by the total number of models. If equal to "auto" determined by the number of models in displayed classes. - cmap - A colormap or registered colormap name. Default is cm.jet. If class_size is "number" and norm is either "auto" - or "full" the map is converted to an indexed colormap. + cmap - A colormap or registered colormap name. Default is cm.jet. + If class_size is "number" and norm is either "auto" + or "full" the map is converted to an indexed colormap. highlight_cmap - A colormap or registered colormap name for coloring highlights. Default is cm.gray. title - True, False, or a string. Defaults to True, which displays some basic information about the graph. p_alpha - Probability graph alpha. (Colorbar remains opaque). Default is 0.7. @@ -478,7 +581,9 @@ def plot3dclassprobs(self, **kwds): All other keywords are passed to the colorbar. - Returns a dictionary containing the following figure elements: + Returns + ------- + a dictionary containing the following figure elements: "fig" - The figure "axis" - The image axis "cbaxis" - The colorbar axis, if it exists. @@ -486,27 +591,26 @@ def plot3dclassprobs(self, **kwds): from matplotlib import cm, colorbar, colors from matplotlib.collections import PolyCollection - from mpl_toolkits.mplot3d import Axes3D fig = kwds.pop("figure", plt.gcf()) - ax = fig.add_subplot(kwds.pop("subplot",111), projection='3d') + ax = fig.add_subplot(kwds.pop("subplot", 111), projection="3d") - cbkwds = kwds.copy() + kwds.copy() # Resolve keywords (title resolved later) dGs = kwds.pop("dGs", self.dgs) highlight = kwds.pop("highlight", []) classes = kwds.pop("classes", range(len(self.classes))) - probfilter = kwds.pop("probfilter", [0.,1.]) + probfilter = kwds.pop("probfilter", [0.0, 1.0]) class_size = kwds.pop("class_size", "number") norm = kwds.pop("norm", "auto") - cmap = kwds.pop("cmap", cm.jet) - highlight_cmap = kwds.pop("highlight_cmap", cm.gray) + cmap = kwds.pop("cmap", cm.get_cmap("jet")) + highlight_cmap = kwds.pop("highlight_cmap", cm.get_cmap("gray")) title = kwds.pop("title", True) p_alpha = kwds.pop("p_alpha", 0.7) - scale = kwds.pop("scale", 1.) + scale = kwds.pop("scale", 1.0) - xs = dGs*scale + xs = dGs * scale verts = [] zs = [] zlabels = [] @@ -515,117 +619,134 @@ def plot3dclassprobs(self, **kwds): maxys = np.max(ys) if maxys >= probfilter[0] and maxys <= probfilter[1]: - p0, p1 = ((xs[0], 0), (xs[-1],0)) # points to close the vertices - verts.append(np.concatenate([[p0], zip(xs,ys), [p1], [p0]])) + p0, p1 = ((xs[0], 0), (xs[-1], 0)) # points to close the vertices + verts.append(np.concatenate([[p0], zip(xs, ys), [p1], [p0]])) zlabels.append(i) - ### Define face colors + # Define face colors fc = np.array([len(self.classes[z]) for z in zlabels]) - if class_size is "fraction": - fc = fc/float(len(self.results)) + if class_size == "fraction": + fc = fc / float(len(self.results)) # Index the colormap if necessary - if class_size is "number": - if norm is "auto": - indexedcolors = cmap(np.linspace(0., 1., np.max(fc))) + if class_size == "number": + if norm == "auto": + indexedcolors = cmap(np.linspace(0.0, 1.0, np.max(fc))) cmap = colors.ListedColormap(indexedcolors) - elif norm is "full": - indexedcolors = cmap(np.linspace(0., 1., len(self.results))) + elif norm == "full": + indexedcolors = cmap(np.linspace(0.0, 1.0, len(self.results))) cmap = colors.ListedColormap(indexedcolors) # A user-specified norm cannot be used to index a colormap. # Create proper norms for "auto" and "full" types. - if norm is "auto": - if class_size is "number": + if norm == "auto": + if class_size == "number": mic = np.min(fc) mac = np.max(fc) nc = mac - mic + 1 - norm = colors.BoundaryNorm(np.linspace(mic, mac+1, nc+1), nc) - if class_size is "fraction": + norm = colors.BoundaryNorm(np.linspace(mic, mac + 1, nc + 1), nc) + if class_size == "fraction": norm = colors.Normalize() norm.autoscale(fc) - elif norm is "full": + elif norm == "full": mcolor = len(self.results) - if class_size is "number": - norm = colors.BoundaryNorm(np.linspace(0, mcolor+1, mcolor+2), mcolor+1) - if class_size is "fraction": - norm = colors.Normalize(0., 1.) + if class_size == "number": + norm = colors.BoundaryNorm(np.linspace(0, mcolor + 1, mcolor + 2), mcolor + 1) + if class_size == "fraction": + norm = colors.Normalize(0.0, 1.0) zs = np.arange(len(zlabels)) poly = PolyCollection(verts, facecolors=cmap(norm(fc)), closed=False) poly.set_alpha(p_alpha) - cax = ax.add_collection3d(poly, zs=zs, zdir='y') + ax.add_collection3d(poly, zs=zs, zdir="y") # Highlight values of interest color_idx = np.linspace(0, 1, len(highlight)) for dG, ci in zip(highlight, color_idx): for z_logical, z_plot in zip(zlabels, zs): - ax.plot([dG, dG], [z_plot, z_plot], [0, self.classprobs[dG][z_logical]], color=highlight_cmap(ci), alpha=p_alpha) - - ax.set_xlabel('dG') - ax.set_xlim3d(dGs[0]*scale, dGs[-1]*scale) - ax.set_ylabel('Class') + ax.plot( + [dG, dG], + [z_plot, z_plot], + [0, self.classprobs[dG][z_logical]], + color=highlight_cmap(ci), + alpha=p_alpha, + ) + + ax.set_xlabel("dG") + ax.set_xlim3d(dGs[0] * scale, dGs[-1] * scale) + ax.set_ylabel("Class") ax.set_ylim3d(zs[0], zs[-1]) ax.set_yticks(zs) ax.set_yticklabels([str(z) for z in zlabels]) - ax.set_zlabel('Akaike probability') + ax.set_zlabel("Akaike probability") ax.set_zlim3d(0, 1) if title is True: - title = "Class probabilities\n\ + title = ( + "Class probabilities\n\ Max probabilities in %s\n\ - %i/%i classes with %i/%i models displayed"\ - %(probfilter, - len(zs), len(self.classes), - np.sum([len(self.classes[z]) for z in zlabels]), len(self.results) ) + %i/%i classes with %i/%i models displayed" + % ( + probfilter, + len(zs), + len(self.classes), + np.sum([len(self.classes[z]) for z in zlabels]), + len(self.results), + ) + ) if title is not False: - figtitle = fig.suptitle(title) + fig.suptitle(title) # Add colorbar if "cbpos" in kwds: cbpos = kwds.pop("cbpos") - aspect = cbpos[3]/cbpos[2] - plt.tight_layout() # do it before cbaxis, so colorbar is ignored. + plt.tight_layout() # do it before cbaxis, so colorbar is ignored. transAtoF = ax.transAxes + fig.transFigure.inverted() rect = transforms.Bbox.from_bounds(*cbpos).transformed(transAtoF).bounds cbaxis = fig.add_axes(rect) # Remove all colorbar.make_axes keywords except orientation - kwds = eatkwds("fraction", "pad", "shrink", "aspect", - "anchor", "panchor", **kwds) + kwds = eatkwds("fraction", "pad", "shrink", "aspect", "anchor", "panchor", **kwds) else: kwds.setdefault("shrink", 0.75) # In matplotlib 1.1.0 make_axes_gridspec ignores anchor and panchor keywords. # Eat these keywords for now. kwds = eatkwds("anchor", "panchor", **kwds) - cbaxis, kwds = colorbar.make_axes_gridspec(ax, **kwds) # gridspec allows tight_layout - plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored + cbaxis, kwds = colorbar.make_axes_gridspec(ax, **kwds) # gridspec allows tight_layout + plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored cb = colorbar.ColorbarBase(cbaxis, cmap=cmap, norm=norm, **kwds) - if class_size is "number": + if class_size == "number": cb.set_label("Models in class") - elif class_size is "fraction": + elif class_size == "fraction": cb.set_label("Fraction of models in class") - - return {"fig":fig, "axis":ax, "cb":cb, "cbaxis": cbaxis} + return {"fig": fig, "axis": ax, "cb": cb, "cbaxis": cbaxis} def get_model(self, dG, **kwds): """Return index of best model of best class at given dG. - Parameters: - dG - The uncertainty used to calculate probabilities - + Parameters + ---------- + dG : array-like + The uncertainty used to calculate probabilities - Keywords: + Keywords + -------- corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + + Returns + ------- + int + Index of best model of best class at given dG. + """ corder = kwds.pop("corder", 0) morder = kwds.pop("morder", 0) if "cls" in kwds: @@ -634,34 +755,50 @@ def get_model(self, dG, **kwds): cls = self.get_class(dG, corder=corder) if cls is None: - return self.sortedprobs[dG][-1-morder] + return self.sortedprobs[dG][-1 - morder] else: - return self.sortedclasses[dG][cls][-1-morder] + return self.sortedclasses[dG][cls][-1 - morder] def get_class(self, dG, **kwds): """Return index of best class at given dG. - Parameters: - dG - The uncertainty used to calculate probabilities + Parameters + ---------- + dG : array-like + The uncertainty used to calculate probabilities + Keywords + -------- + corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). - Keywords: - corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default).""" + Returns + ------- + int + Index of best model of best class at given dG. + """ corder = kwds.pop("corder", 0) - return self.sortedclassprobs[dG][-1-corder] # index of corderth best class + return self.sortedclassprobs[dG][-1 - corder] # index of corderth best class def get_prob(self, dG, **kwds): """Return Akaike probability of best model of best class at given dG. - Parameters: - dG - The uncertainty used to calculate probabilities - + Parameters + ---------- + dG : array-like + The uncertainty used to calculate probabilities - Keywords: + Keywords + -------- corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + + Returns + ------- + array-like + The sequence of Akaike probability of best model of best class at given dG. + """ idx = self.get_model(dG, **kwds) if "cls" in kwds and kwds["cls"] is None: return self.aicprobs[dG][idx] @@ -672,15 +809,23 @@ def get_prob(self, dG, **kwds): def get_nfree(self, dG, **kwds): """Return number of free parameters of best model of best class at given dG. - Parameters: - dG - The uncertainty used to calculate probabilities - + Parameters + ---------- + dG : array-like + The uncertainty used to calculate probabilities - Keywords: + Keywords + -------- corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + + Returns + ------- + int + Number of free parameters of best model of best class at given dG. + """ idx = self.get_model(dG, **kwds) model = self.results[idx][1] baseline = self.results[idx][2] @@ -689,38 +834,57 @@ def get_nfree(self, dG, **kwds): def get_aic(self, dG, **kwds): """Return number of free parameters of best model of best class at given dG. - Parameters: - dG - The uncertainty used to calculate probabilities - + Parameters + ---------- + dG : array-like + The uncertainty used to calculate probabilities - Keywords: + Keywords + -------- corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + + Returns + ------- + int + Number of free parameters of best model of best class at given dG. + """ idx = self.get_model(dG, **kwds) return self.aics[dG][idx].stat def get(self, dG, *args, **kwds): """Return tuple of values corresponding to string arguments for best model of best class at given dG. - Parameters: - dG - The uncertainty used to calculate probabilities + Parameters + ---------- + dG : array-like + The uncertainty used to calculate probabilities Permissible arguments: "aic", "class", "dG", "model", "nfree", "prob" ("dG" simply returns the provided dG value) - Keywords: + Keywords + -------- corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" - fdict = {"aic": self.get_aic, - "class": self.get_class, - "dg": lambda x: x, - "model": self.get_model, - "nfree": self.get_nfree, - "prob": self.get_prob} + cls - Override corder with a specific class index, or None to ignore classes entirely. + + Returns + ------- + tuple + The values corresponding to string arguments for best model of best class at given dG. + """ + fdict = { + "aic": self.get_aic, + "class": self.get_class, + "dg": lambda x: x, + "model": self.get_model, + "nfree": self.get_nfree, + "prob": self.get_prob, + } values = [] for a in args: values.append(fdict[a].__call__(dG, **kwds)) @@ -730,6 +894,16 @@ def maxprobdG_byclass(self, model): """Return the post-hoc dG for which the given model's Akaike probability is maximized. Each model is mapped to its class' best member. + + Parameters + ---------- + model : array-like + The model to get the post-hoc dG. + + Returns + ------- + array-like + The post-hoc dG for the given model where the given model's Akaike probability is maximized. """ cls = self.classes_idx[model] probs = [self.classprobs[dg][cls] for dg in self.dgs] @@ -738,7 +912,18 @@ def maxprobdG_byclass(self, model): def maxprobdG_bymodel(self, model): """Return the post-hoc dG for which the given model's Akaike probability is maximized. - Classes are not considered.""" + Classes are not considered. + + Parameters + ---------- + model : array-like + The model to get the post-hoc dG. + + Returns + ------- + array-like + The post-hoc dG by the given model's Akaike probability is maximize + """ probs = [self.aicprobs[dg][model] for dg in self.dgs] prob_idx = np.argmax(probs) return self.dgs[prob_idx] @@ -746,13 +931,33 @@ def maxprobdG_bymodel(self, model): def maxprobmodel_byclass(self, dG): """Calculate the model which maximizes probability at given dG. - The best class is mapped to its best model.""" + The best class is mapped to its best model. + + Parameters + ---------- + dG : array-like + The uncertainty used to calculate probabilities + + Returns + ------- + float + The model mapped by class which maximizes probability at given dG.""" cls = self.sortedclassprobs[dG][-1] m = self.sortedclasses[dG][cls][-1] return m def maxprobmodel_bymodel(self, dG): """Return the model which maximizes probability at given dG. - Classes are not considered.""" + Classes are not considered. + + Parameters + ---------- + dG : array-like + The uncertainty used to calculate probabilities + + Returns + ------- + model : array-like + The model which maximizes probability at given dG.""" # Note that if there are identical models this returns the one of greatest dg. return self.sortedprobs[dG][-1] diff --git a/diffpy/srmise/pdfdataset.py b/src/diffpy/srmise/pdfdataset.py similarity index 53% rename from diffpy/srmise/pdfdataset.py rename to src/diffpy/srmise/pdfdataset.py index 2bdd9bf..49fd168 100644 --- a/diffpy/srmise/pdfdataset.py +++ b/src/diffpy/srmise/pdfdataset.py @@ -20,7 +20,6 @@ """ -import copy import os.path import re import time @@ -31,70 +30,106 @@ class PDFComponent(object): """Common base class.""" + def __init__(self, name): - """initialize + """initialize the object - name -- object name + Parameter + --------- + name : str + object name """ self.name = name - def close ( self, force = False ): + def close(self, force=False): """close myself - force -- if forcibly (no wait) + Parameter + --------- + force : bool + Force to close if True, default is False. """ pass + class PDFDataSet(PDFComponent): """PDFDataSet is a class for experimental PDF data. - Data members: - robs -- list of observed r points - Gobs -- list of observed G values - drobs -- list of standard deviations of robs - dGobs -- list of standard deviations of Gobs - stype -- scattering type, 'X' or 'N' - qmax -- maximum value of Q in inverse Angstroms. Termination - ripples are neglected for qmax=0. - qdamp -- specifies width of Gaussian damping factor in pdf_obs due - to imperfect Q resolution - qbroad -- quadratic peak broadening factor related to dataset - spdiameter -- particle diameter for shape damping function - Note: This attribute was moved to PDFStructure. - It is kept for backward compatibility when reading - PDFgui project files. - dscale -- scale factor of this dataset - rmin -- same as robs[0] - rmax -- same as robs[-1] - filename -- set to absolute path after reading from file - metadata -- dictionary for other experimental conditions, such as - temperature or doping - - Global member: - persistentItems -- list of attributes saved in project file - refinableVars -- set (dict) of refinable variable names. + Attributes + ---------- + robs : list + The list of observed r points. + Gobs : list + The list of observed G values. + drobs : list + The list of standard deviations of `robs`. + dGobs : list + The list of standard deviations of `Gobs`. + stype : str + The scattering type, either 'X' or 'N'. + qmax : float + The maximum value of Q in inverse Angstroms. Termination ripples are neglected for qmax=0. + qdamp : float + Specifies width of Gaussian damping factor in pdf_obs due to imperfect Q resolution. + qbroad : float + The quadratic peak broadening factor related to the dataset. + spdiameter : float + The particle diameter for shape damping function. Note: This attribute was moved to PDFStructure. + It is retained here for backward compatibility when reading PDFgui project files. + dscale : float + The scale factor of this dataset. + rmin : float + The same as `robs[0]`. + rmax : float + The same as `robs[-1]`. + filename : str + Set to the absolute path after reading from a file. + metadata : dict + The dictionary for other experimental conditions, such as temperature or doping. + + Class Members + ------------- + persistentItems : list + The list of attributes saved in the project file. + refinableVars : set + The set (or dict-like) of refinable variable names. """ - persistentItems = [ 'robs', 'Gobs', 'drobs', 'dGobs', 'stype', 'qmax', - 'qdamp', 'qbroad', 'dscale', 'rmin', 'rmax', 'metadata' ] - refinableVars = dict.fromkeys(('qdamp', 'qbroad', 'dscale')) + persistentItems = [ + "robs", + "Gobs", + "drobs", + "dGobs", + "stype", + "qmax", + "qdamp", + "qbroad", + "dscale", + "rmin", + "rmax", + "metadata", + ] + refinableVars = dict.fromkeys(("qdamp", "qbroad", "dscale")) def __init__(self, name): """Initialize. - name -- name of the data set. It must be a unique identifier. + name : str + The name of the data set. It must be a unique identifier. """ PDFComponent.__init__(self, name) self.clear() return def clear(self): - """reset all data members to initial empty values""" + """reset all data members to initial empty values + + The purpose of this method is to set the PDF dataset to initial empty values.""" self.robs = [] self.Gobs = [] self.drobs = [] self.dGobs = [] - self.stype = 'X' + self.stype = "X" # user must specify qmax to get termination ripples self.qmax = 0.0 self.qdamp = 0.001 @@ -108,12 +143,21 @@ def clear(self): return def setvar(self, var, value): - """Assign data member using PdfFit-style variable. - Used by applyParameters(). + """Assign a data member using PdfFit-style variable notation. + This method is typically utilized by the `applyParameters()` function. + + Parameters + ---------- + var : str + String representation of the dataset PdfFit variable. + Possible values include: 'qdamp', 'qbroad', 'dscale'. - var -- string representation of dataset PdfFit variable. - Possible values: qdamp, qbroad, dscale - value -- new value of the variable + value : float + The new value to which the variable `var` will be set. + + Returns + ------- + None """ barevar = var.strip() fvalue = float(value) @@ -128,10 +172,16 @@ def getvar(self, var): """Obtain value corresponding to PdfFit dataset variable. Used by findParameters(). - var -- string representation of dataset PdfFit variable. - Possible values: qdamp, qbroad, dscale + Parameters + ---------- + var : str + string representation of dataset PdfFit variable. + Possible values: qdamp, qbroad, dscale - returns value of var + Returns + ------- + float + value of var """ barevar = var.strip() if barevar in PDFDataSet.refinableVars: @@ -144,39 +194,47 @@ def getvar(self, var): def read(self, filename): """load data from PDFGetX2 or PDFGetN gr file - filename -- file to read from + filename : str + file to read from - returns self + Returns + ------- + self """ try: - self.readStr(open(filename,'rb').read()) - except PDFDataFormatError, err: + self.readStr(open(filename, "rb").read()) + except PDFDataFormatError as err: basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) self.filename = os.path.abspath(filename) return self - def readStr(self, datastring): """read experimental PDF data from a string - datastring -- string of raw data + Parameter + --------- + datastring : str + string of raw data - returns self + Returns + self """ self.clear() # useful regex patterns: - rx = { 'f' : r'[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?' } + rx = {"f": r"[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?"} # find where does the data start - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", datastring, re.M) # start_data is position where the first data line starts if res: start_data = res.end() else: # find line that starts with a floating point number - regexp = r'^\s*%(f)s' % rx + regexp = r"^\s*%(f)s" % rx res = re.search(regexp, datastring, re.M) if res: start_data = res.start() @@ -186,18 +244,18 @@ def readStr(self, datastring): databody = datastring[start_data:].strip() # find where the metadata starts - metadata = '' - res = re.search(r'^#+\ +metadata\b\n', header, re.M) + metadata = "" + res = re.search(r"^#+ +metadata\b\n", header, re.M) if res: - metadata = header[res.end():] - header = header[:res.start()] + metadata = header[res.end() :] + header = header[: res.start()] # parse header # stype - if re.search('(x-?ray|PDFgetX)', header, re.I): - self.stype = 'X' - elif re.search('(neutron|PDFgetN)', header, re.I): - self.stype = 'N' + if re.search("(x-?ray|PDFgetX)", header, re.I): + self.stype = "X" + elif re.search("(neutron|PDFgetN)", header, re.I): + self.stype = "N" # qmax regexp = r"\bqmax *= *(%(f)s)\b" % rx res = re.search(regexp, header, re.I) @@ -227,12 +285,12 @@ def readStr(self, datastring): regexp = r"\b(?:temp|temperature|T)\ *=\ *(%(f)s)\b" % rx res = re.search(regexp, header) if res: - self.metadata['temperature'] = float(res.groups()[0]) + self.metadata["temperature"] = float(res.groups()[0]) # doping regexp = r"\b(?:x|doping)\ *=\ *(%(f)s)\b" % rx res = re.search(regexp, header) if res: - self.metadata['doping'] = float(res.groups()[0]) + self.metadata["doping"] = float(res.groups()[0]) # parsing gerneral metadata if metadata: @@ -241,12 +299,12 @@ def readStr(self, datastring): res = re.search(regexp, metadata, re.M) if res: self.metadata[res.groups()[0]] = float(res.groups()[1]) - metadata = metadata[res.end():] + metadata = metadata[res.end() :] else: break # read actual data - robs, Gobs, drobs, dGobs - inf_or_nan = re.compile('(?i)^[+-]?(NaN|Inf)\\b') + inf_or_nan = re.compile("(?i)^[+-]?(NaN|Inf)\\b") has_drobs = True has_dGobs = True # raise PDFDataFormatError if something goes wrong @@ -257,15 +315,13 @@ def readStr(self, datastring): self.robs.append(float(v[0])) self.Gobs.append(float(v[1])) # drobs is valid if all values are defined and positive - has_drobs = (has_drobs and - len(v) > 2 and not inf_or_nan.match(v[2])) + has_drobs = has_drobs and len(v) > 2 and not inf_or_nan.match(v[2]) if has_drobs: v2 = float(v[2]) has_drobs = v2 > 0.0 self.drobs.append(v2) # dGobs is valid if all values are defined and positive - has_dGobs = (has_dGobs and - len(v) > 3 and not inf_or_nan.match(v[3])) + has_dGobs = has_dGobs and len(v) > 3 and not inf_or_nan.match(v[3]) if has_dGobs: v3 = float(v[3]) has_dGobs = v3 > 0.0 @@ -274,24 +330,30 @@ def readStr(self, datastring): self.drobs = len(self.robs) * [0.0] if not has_dGobs: self.dGobs = len(self.robs) * [0.0] - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise PDFDataFormatError(err) self.rmin = self.robs[0] self.rmax = self.robs[-1] - if not has_drobs: self.drobs = len(self.robs) * [0.0] - if not has_dGobs: self.dGobs = len(self.robs) * [0.0] + if not has_drobs: + self.drobs = len(self.robs) * [0.0] + if not has_dGobs: + self.dGobs = len(self.robs) * [0.0] return self - def write(self, filename): """Write experimental PDF data to a file. - filename -- name of file to write to + Parameters + ---------- + filename : str + name of file to write to - No return value. + Returns + ------- + None """ bytes = self.writeStr() - f = open(filename, 'w') + f = open(filename, "w") f.write(bytes) f.close() return @@ -299,42 +361,48 @@ def write(self, filename): def writeStr(self): """String representation of experimental PDF data. - Return data string. + + Returns + ------- + str + The PDF data string. """ lines = [] # write metadata - lines.extend([ - 'History written: ' + time.ctime(), - 'produced by ' + getuser(), - '##### PDFgui' ]) + lines.extend( + [ + "History written: " + time.ctime(), + "produced by " + getuser(), + "##### PDFgui", + ] + ) # stype - if self.stype == 'X': - lines.append('stype=X x-ray scattering') - elif self.stype == 'N': - lines.append('stype=N neutron scattering') + if self.stype == "X": + lines.append("stype=X x-ray scattering") + elif self.stype == "N": + lines.append("stype=N neutron scattering") # qmax if self.qmax == 0: - qmax_line = 'qmax=0 correction not applied' + qmax_line = "qmax=0 correction not applied" else: - qmax_line = 'qmax=%.2f' % self.qmax + qmax_line = "qmax=%.2f" % self.qmax lines.append(qmax_line) # qdamp - lines.append('qdamp=%g' % self.qdamp) + lines.append("qdamp=%g" % self.qdamp) # qbroad - lines.append('qbroad=%g' % self.qbroad) + lines.append("qbroad=%g" % self.qbroad) # dscale - lines.append('dscale=%g' % self.dscale) + lines.append("dscale=%g" % self.dscale) # metadata if len(self.metadata) > 0: - lines.append('# metadata') - for k, v in self.metadata.iteritems(): - lines.append( "%s=%s" % (k,v) ) + lines.append("# metadata") + for k, v in self.metadata.items(): + lines.append("%s=%s" % (k, v)) # write data: - lines.append('##### start data') - lines.append('#L r(A) G(r) d_r d_Gr') + lines.append("##### start data") + lines.append("#L r(A) G(r) d_r d_Gr") for i in range(len(self.robs)): - lines.append('%g %g %g %g' % \ - (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i]) ) + lines.append("%g %g %g %g" % (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i])) # that should be it datastring = "\n".join(lines) + "\n" return datastring @@ -342,8 +410,15 @@ def writeStr(self): def copy(self, other=None): """copy self to other. if other is None, create new instance - other -- ref to other object - returns reference to copied object + Parameters + ---------- + other : PDFDataSet instance + ref to other object + + Returns + ------- + PDFDataSet instance + reference to copied object """ if other is None: other = PDFDataSet(self.name) @@ -351,48 +426,62 @@ def copy(self, other=None): other.clear() # some attributes can be assigned, e.g., robs, Gobs, drobs, dGobs are # constant so they can be shared between copies. - assign_attributes = ( 'robs', 'Gobs', 'drobs', 'dGobs', 'stype', - 'qmax', 'qdamp', 'qbroad', 'dscale', - 'rmin', 'rmax', 'filename' ) + assign_attributes = ( + "robs", + "Gobs", + "drobs", + "dGobs", + "stype", + "qmax", + "qdamp", + "qbroad", + "dscale", + "rmin", + "rmax", + "filename", + ) # for others we will assign a copy - copy_attributes = ( 'metadata', ) + copy_attributes = ("metadata",) for a in assign_attributes: setattr(other, a, getattr(self, a)) import copy + for a in copy_attributes: setattr(other, a, copy.deepcopy(getattr(self, a))) return other + # End of class PDFDataSet class PDFDataFormatError(Exception): - """Exception class marking failure to proccess PDF data string. - """ + """Exception class marking failure to proccess PDF data string.""" + pass # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import sys + filename = sys.argv[1] dataset = PDFDataSet("test") dataset.read(filename) - print "== metadata ==" - for k, v in dataset.metadata.iteritems(): - print k, "=", v - print "== data members ==" - for k, v in dataset.__dict__.iteritems(): - if k in ('metadata', 'robs', 'Gobs', 'drobs', 'dGobs') or k[0] == "_": + print("== metadata ==") + for k, v in dataset.metadata.items(): + print(k, "=", v) + print("== data members ==") + for k, v in dataset.__dict__.items(): + if k in ("metadata", "robs", "Gobs", "drobs", "dGobs") or k[0] == "_": continue - print k, "=", v - print "== robs Gobs drobs dGobs ==" + print(k, "=", v) + print("== robs Gobs drobs dGobs ==") for i in range(len(dataset.robs)): - print dataset.robs[i], dataset.Gobs[i], dataset.drobs[i], dataset.dGobs[i] - print "== writeStr() ==" - print dataset.writeStr() - print "== datasetcopy.writeStr() ==" + print(dataset.robs[i], dataset.Gobs[i], dataset.drobs[i], dataset.dGobs[i]) + print("== writeStr() ==") + print(dataset.writeStr()) + print("== datasetcopy.writeStr() ==") datasetcopy = dataset.copy() - print datasetcopy.writeStr() + print(datasetcopy.writeStr()) # End of file diff --git a/diffpy/srmise/pdfpeakextraction.py b/src/diffpy/srmise/pdfpeakextraction.py similarity index 79% rename from diffpy/srmise/pdfpeakextraction.py rename to src/diffpy/srmise/pdfpeakextraction.py index 8f9ecd6..8303843 100644 --- a/diffpy/srmise/pdfpeakextraction.py +++ b/src/diffpy/srmise/pdfpeakextraction.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -15,56 +16,56 @@ import os.path import re -import matplotlib.pyplot as plt import numpy as np +from diffpy.srmise import srmiselog from diffpy.srmise.modelcluster import ModelCluster, ModelCovariance # from diffpy.pdfgui.control.pdfdataset import PDFDataSet from diffpy.srmise.pdfdataset import PDFDataSet from diffpy.srmise.peakextraction import PeakExtraction -from diffpy.srmise.srmiseerrors import * +from diffpy.srmise.srmiseerrors import ( + SrMiseDataFormatError, + SrMiseError, + SrMiseQmaxError, + SrMiseStaticOwnerError, + SrMiseUndefinedCovarianceError, +) logger = logging.getLogger("diffpy.srmise") -from diffpy.srmise import srmiselog - class PDFPeakExtraction(PeakExtraction): - """Class for peak extraction of peaks from the PDF. - - Data members in addition to those from PeakExtraction - filename: Source PDF file - nyquist: Whether or not to fit final model at Nyquist sampling rate - qmax: qmax to use during extraction. Use 0 for infinity. - qmax_reportedbypdf: The qmax read from a file containing a PDF - qmax_fromdata: The qmax determined directly from the PDF data - scale: Whether or not to use increased uncertainties when supersampling. - This can speed extraction by reducing the number of very small - peaks found while supersampled, but also means small features - are more likely to be missed. This option puts the chi-square error - of a fit on roughly the same scale before and after resampling. - This option has no effect when Nyquist is False, and defaults - to False when Nyquist is True. - supersample: Make sure data is supersampled by at least this factor - above Nyquist sampling before starting extraction. - - Note that resampling the PDF does not properly propagate the corresponding - uncertainties, which are merely interpolated (and possibly scaled, see above). - Further, all uncertainties are treated as statistically independent, but above - the Nyquist rate the uncertainties of nearby points are highly correlated. - The most trustworthy results are therefore obtained by providing data sampled - at the Nyquist rate with correctly propagated uncertainties. - - In some cases the number of free parameters of the best model found may - exceed the number of independent points in the PDF. This is frequently - true when the PDF is oversampled and/or the reported uncertainties in the - PDF are very small. If this prevents resampling at the Nyquist rate (when - this is desired) the degree of oversampling is reported. + """PDFPeakExtraction extends the PeakExtraction class to specialize in extracting + peaks from PDF (Probability Density Function) data. + + Parameters + ---------- + filename : str + The source PDF file path. + nyquist : bool, optional + Specifies whether to fit the final model at the Nyquist sampling rate. + Defaults to False. + qmax : float, optional + The maximum q value to use during peak extraction. Use 0 to denote infinity. + Defaults to 0. + qmax_reportedbypdf : float + The qmax value read directly from the PDF file. + qmax_fromdata : float + The qmax value determined directly from the PDF data. + scale : bool, optional + Determines whether to use increased uncertainties during supersampling. + This can expedite extraction by minimizing the detection of minuscule peaks, + albeit risking the overlook of minor features. When `Nyquist` is True, + uncertainties are scaled to maintain a similar chi-square error pre- + and post-resampling. Defaults to False if `Nyquist` is True. + supersample : int, optional + Ensures the data is supersampled by at least this factor above the + Nyquist rate before initiating peak extraction. Defaults to 1. """ def __init__(self): - """Initialize.""" + """Initialize the PDFPeakExtraction class.""" newvars = ["qmax", "supersample", "nyquist", "scale"] PeakExtraction.__init__(self, newvars) return @@ -73,7 +74,9 @@ def loadpdf(self, pdf): """Load dataset. Parameters - pdf: A PDFDataSet object, or the name of a file readable by one. + ---------- + pdf: PDFDataSet instance or str + The PDFDataSet instance or a PDF file name. """ self.clear() @@ -88,7 +91,14 @@ def loadpdf(self, pdf): return def setdata(self, x, y, dx=None, dy=None): - """Set data.""" + """Set data. + + Parameters + ---------- + x : array-like + The x-coordinates of the data. + y : array-like + The y-coordinates of the data.""" PeakExtraction.setdata(self, x, y, dx, dy) try: self.qmax_fromdata = find_qmax(self.x, self.y)[0] @@ -96,7 +106,9 @@ def setdata(self, x, y, dx=None, dy=None): logger.info("Could not determine qmax from the data.") def clear(self): - """Clear all members.""" + """Clear all members. + + The purpose of the method is to ensure the object is in a clean state.""" # TODO: Clear additional members self.filename = None self.nyquist = None @@ -110,22 +122,48 @@ def clear(self): def setvars(self, quiet=False, **kwds): """Set one or more extraction variables. - Variables - quiet: [False] Log changes quietly. - - Keywords - cres: The clustering resolution, must be > 0. - effective_dy: The uncertainties actually used during extraction - dg: Alias for effective_dy - pf: Sequence of PeakFunctionBase subclass instances. - baseline: Baseline instance or BaselineFunction instance (use built-in estimation) - error_method: ErrorEvaluator subclass instance used to compare models (default AIC) - initial_peaks: Peaks instance. These peaks are present at the start of extraction. - rng: Sequence specifying the least and greatest x-values over which to extract peaks. - qmax: The qmax value for the pdf. Using "automatic" will estimate it from data. - nyquist: Use nyquist sampling or not (boolean) - supersample: Degree of supersampling above Nyquist rate to use. - scale: Scale uncertainties on recursion when nyquist is True (boolean).""" + Parameters + ---------- + quiet : bool, optional + Log changes quietly. Default is False. + + **kwds : keyword arguments + Additional variables to set. Possible keywords include: + + - cres : float + The clustering resolution, must be greater than or equal to 0. + + - effective_dy : float + The uncertainties actually used during extraction. Aliased as 'dg'. + + - pf : list of PeakFunctionBase subclasses instances + Sequence of peak function base subclass instances. + + - baseline : Baseline or BaselineFunction instance + Baseline instance or BaselineFunction instance for built-in estimation. + + - error_method : ErrorEvaluator subclass instance + Error evaluator subclass instance used to compare models. Default is AIC. + + - initial_peaks : Peaks instance + Peaks instance representing the peaks present at the start of extraction. + + - rng : tuple of (float, float) + Specifies the least and greatest x-values over which to extract peaks. + + - qmax : float or "automatic" + The qmax value for the probability density function (pdf). + If set to "automatic", it will be estimated from the data. + + - nyquist : bool + Whether to use nyquist sampling or not. + + - supersample : int + Degree of supersampling above the Nyquist rate to use. + + - scale : bool + Scale uncertainties on recursion when nyquist is True. + """ # Treat "dg" as alias for "effective_dy" if "dg" in kwds: if "effective_dy" not in kwds: @@ -147,7 +185,12 @@ def setvars(self, quiet=False, **kwds): PeakExtraction.setvars(self, quiet, **kwds) def defaultvars(self, *args): - """Set default values.""" + """Set default values. + + Parameters + ---------- + *args : argparse.Namespace + Arguments passed to PeakExtraction.setdata().""" nargs = list(args) # qmax preference: reported, then fromdata, then 0. @@ -193,7 +236,7 @@ def defaultvars(self, *args): nargs.remove("cres") if self.pf is None or "pf" in args: - from diffpy.srmise.peaks import GaussianOverR + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR self.pf = [GaussianOverR(0.7)] if "pf" in args: @@ -219,10 +262,21 @@ def resampledata(self, dr, **kwds): new grid. Parameters - dr: The sampling interval + ---------- + dr : float + The sampling interval for resampling the data. + + **kwds : dict, optional + Additional keyword arguments. - Keywords - eps: [10^-6] Suppress information lost warning when dr-dr_nyquist < eps""" + - eps : float, default=1e-6 + Suppresses the information lost warning when the difference between `dr` + and the Nyquist interval `dr_nyquist` is less than `eps`. + + Returns + ------- + tuple of ndarray + A tuple containing the resampled (x, y, error in x, effective error in y).""" self.defaultvars() # Find correct range if necessary. eps = kwds.get("eps", 10**-6) @@ -234,8 +288,7 @@ def resampledata(self, dr, **kwds): # Not a robust epsilon test, but all physical Nyquist rates in same oom. if dr - dr_nyquist > eps: logger.warning( - "Resampling at %s, below Nyquist rate of %s. Information will be lost!" - % (dr, dr_nyquist) + "Resampling at %s, below Nyquist rate of %s. Information will be lost!" % (dr, dr_nyquist) ) r = np.arange(max(self.x[0], self.rng[0]), min(self.x[-1], self.rng[1]), dr) @@ -258,7 +311,14 @@ def errorscale(self, dr): is enabled, and scale is True. Parameters - dr: The sampling interval""" + ---------- + dr: float + The sampling interval + + Returns + ------- + float + The uncertainties scaled.""" if self.qmax > 0 and self.nyquist and self.scale: dr_nyquist = np.pi / self.qmax return np.max([np.sqrt(dr_nyquist / dr), 1.0]) @@ -266,7 +326,20 @@ def errorscale(self, dr): return 1.0 def extract(self, **kwds): - """Extract peaks from the PDF. Returns ModelCovariance instance summarizing results.""" + """Extract peaks from the PDF. Returns ModelCovariance instance summarizing results. + + Parameters + ---------- + **kwds : dict + Additional keyword arguments that might influence the extraction process. + These could include parameters like `qmax`, `supersample`, `nyquist`, etc., which + affect resampling and model refinement strategies. + + Returns + ------- + ModelCovariance + An instance of ModelCovariance summarizing the covariance of the extracted model parameters. + """ # TODO: The sanest way forward is to create a PeakExtraction object that does # the calculations for resampled data. All the relevant extraction variables # can be carefully controlled this way as well. Furthermore, it continues to @@ -294,7 +367,7 @@ def extract(self, **kwds): if dr_raw > dr_nyquist: # Technically I should yell for dr_raw >= dr_nyquist, since information # loss may occur at equality. - logger.warn( + logger.warning( "The input PDF appears to be missing information: The " "sampling interval of the input PDF (%s) is larger than " "the Nyquist interval (%s) defined by qmax=%s. This information " @@ -358,7 +431,7 @@ def extract(self, **kwds): logger.info("\n".join(msg), ext) - from diffpy.srmise.peaks import TerminationRipples + from diffpy.srmise.peaks.terminationripples import TerminationRipples owners = list(set([p.owner() for p in ext.model])) tfuncs = {} @@ -440,9 +513,7 @@ def extract(self, **kwds): break else: - ext = ModelCluster( - ext.model, bl, r1, y1, y_error1, None, self.error_method, self.pf - ) + ext = ModelCluster(ext.model, bl, r1, y1, y_error1, None, self.error_method, self.pf) ext.prune() logger.info("Model after resampling and termination ripples:\n%s", ext) @@ -456,10 +527,8 @@ def extract(self, **kwds): try: logger.info(str(cov)) # logger.info("Correlations > .8:\n%s", "\n".join(str(c) for c in cov.correlationwarning(.8))) - except SrMiseUndefinedCovarianceError as e: - logger.warn( - "Covariance not defined for final model. Fit may not have converged." - ) + except SrMiseUndefinedCovarianceError: + logger.warning("Covariance not defined for final model. Fit may not have converged.") logger.info(str(ext)) # Update calculated instance variables @@ -471,7 +540,19 @@ def extract(self, **kwds): return cov def fit(self, **kwds): - """Fit peaks in the PDF. Returns ModelCovariance instance summarizing results.""" + """Fit peaks in the PDF. Returns ModelCovariance instance summarizing results. + + Parameters + ---------- + **kwds : dict + Keyword arguments passed to ModelCovariance instance. Different keywords could have + different strategies to fit. See ModelCovariance class for more information. + + Returns + ------- + ModelCovariance instance + The fitted ModelCovariance instance. + """ self.clearcalc() @@ -488,7 +569,7 @@ def fit(self, **kwds): if dr_raw > dr_nyquist: # Technically I should yell for dr_raw >= dr_nyquist, since information # loss may occur at equality. - logger.warn( + logger.warning( "The input PDF appears to be missing information: The " "sampling interval of the input PDF (%s) is larger than " "the Nyquist interval (%s) defined by qmax=%s. This information " @@ -531,10 +612,8 @@ def fit(self, **kwds): logger.info(str(ext)) try: logger.info(str(cov)) - except SrMiseUndefinedCovarianceError as e: - logger.warn( - "Covariance not defined for final model. Fit may not have converged." - ) + except SrMiseUndefinedCovarianceError: + logger.warning("Covariance not defined for final model. Fit may not have converged.") # Update calculated instance variables self.extraction_type = "fit" @@ -557,7 +636,17 @@ def writemetadata(self): return datastring def readmetadata(self, metastr): - """Read metadata from string.""" + """Read metadata from string. + + Parameters + ---------- + metastr : str + Metadata string to read. + + Returns + ------- + None + """ # filename res = re.search(r"^filename=(.*)$", metastr, re.M) @@ -619,7 +708,17 @@ def writepwa(self, filename, comments="n/a"): """Write string summarizing extracted peaks to file. Parameters - filename: the name of the file to write""" + ---------- + filename : str + The name of the file to write + + comments : str + The comments to write + + Returns + ------- + None + """ bytes = self.writepwastr(comments) f = open(filename, "w") f.write(bytes) @@ -633,7 +732,13 @@ def writepwastr(self, comments): this file. Parameters - comments: String added to header containing notes about the output. + ---------- + comments : str + The string added to header containing notes about the output. + + Returns + ------- + None """ if self.extracted is None: @@ -703,13 +808,8 @@ def writepwastr(self, comments): lines.append("## Model Quality") # Quality of fit - lines.append( - "# Quality reported by ModelEvaluator: %s" % self.extracted.quality().stat - ) - lines.append( - "# Free parameters in extracted peaks: %s" - % self.extracted.model.npars(count_fixed=False) - ) + lines.append("# Quality reported by ModelEvaluator: %s" % self.extracted.quality().stat) + lines.append("# Free parameters in extracted peaks: %s" % self.extracted.model.npars(count_fixed=False)) if self.baseline is not None: fblpars = self.baseline.npars(count_fixed=False) else: @@ -820,25 +920,28 @@ def resample(orig_r, orig_y, new_r): """Resample sequence with Whittaker-Shannon interpolation formula. Parameters - orig_r: (Numpy array) The r grid of the original sample. - orig_y: (Numpy array) The data to resample. - new_r: (Numpy array) The resampled r grid. - - Returns sequence of same type as new_r with the resampled values. + ---------- + orig_r : array-like + The r grid of the original sample. + orig_y : array-like + The data to resample. + new_r : array-like + The resampled r grid. + + Returns + ------- + new_y : array-like + The sequence of same type as new_r with the resampled values. """ n = len(orig_r) dr = (orig_r[-1] - orig_r[0]) / (n - 1) if new_r[0] < orig_r[0]: - logger.warning( - "Resampling outside original grid: %s (requested) < %s (original)" - % (new_r[0], orig_r[0]) - ) + logger.warning("Resampling outside original grid: %s (requested) < %s (original)" % (new_r[0], orig_r[0])) if new_r[-1] > orig_r[-1]: logger.warning( - "Resampling outside original grid: %s (requested) > %s (original)" - % (new_r[-1], orig_r[-1]) + "Resampling outside original grid: %s (requested) > %s (original)" % (new_r[-1], orig_r[-1]) ) new_y = new_r * 0.0 @@ -853,8 +956,19 @@ def find_qmax(r, y, showgraphs=False): """Determine approximate qmax from PDF. Parameters - r: The r values of the PDF. - y: The corresponding y values of the PDF.""" + ---------- + r : array-like + The r values of the PDF. + y : array-like + The corresponding y values of the PDF. + showgraphs : bool + If True, the graphs are shown. + + Returns + ------- + tuple + The qmax of the PDF and its corresponding uncertainties. + """ if len(r) != len(y): emsg = "Argument arrays must have the same length." raise ValueError(emsg) @@ -913,7 +1027,7 @@ def find_qmax(r, y, showgraphs=False): plt.show() plt.ioff() - raw_input() + input() return (qmax, dq) @@ -925,10 +1039,15 @@ def stdratio(data): deviation. Parameters - data: Sequence of data values - - Returns an array of length floor(len(data)/2)-1. The ith element is - equivalent to std(data[:i+2])/std(data[i+2:2i+4]).""" + ---------- + data : array-like + The sequence of data values + + Returns + ------- + array-like + an array of length floor(len(data)/2)-1. The ith element is + equivalent to std(data[:i+2])/std(data[i+2:2i+4]).""" limit = int(np.floor(len(data) / 2)) std_left = np.zeros(limit) diff --git a/diffpy/srmise/peakextraction.py b/src/diffpy/srmise/peakextraction.py similarity index 61% rename from diffpy/srmise/peakextraction.py rename to src/diffpy/srmise/peakextraction.py index 73da8a2..d323ece 100644 --- a/diffpy/srmise/peakextraction.py +++ b/src/diffpy/srmise/peakextraction.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -19,56 +20,81 @@ import matplotlib.pyplot as plt import numpy as np -from diffpy.srmise.baselines import Baseline +from diffpy.srmise import srmiselog +from diffpy.srmise.baselines.base import Baseline from diffpy.srmise.dataclusters import DataClusters from diffpy.srmise.modelcluster import ModelCluster, ModelCovariance -from diffpy.srmise.modelparts import ModelPart, ModelParts -from diffpy.srmise.peaks import Peak, Peaks -from diffpy.srmise.srmiseerrors import * +from diffpy.srmise.peaks.base import Peak, Peaks +from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseEstimationError, SrMiseFileError logger = logging.getLogger("diffpy.srmise") -from diffpy.srmise import srmiselog - class PeakExtraction(object): """Class for peak extraction. - Data members - x: x coordinates of the data - y: y coordinates of the data - dx: uncertainties in the x coordinates (not used) - dy: uncertainties in the y coordinates - effective_dy: uncertainties in the y coordinates actually used during extraction - rng: [xmin, xmax] Range of x coordinates over which to extract peaks - pf: Sequence of peak functions that can be extracted - initial_peaks: Peaks present at start of extraction - baseline: Baseline for data - cres: Resolution of clustering - error_method: ErrorEvaluator class used to compare models + Parameters + ---------- + x : array-like + The x coordinates of the data + y : array-like + The y coordinates of the data + dx : array-like + The uncertainties in the x coordinates (not used) + dy : array-like + The uncertainties in the y coordinates + effective_dy : array-like + The uncertainties in the y coordinates actually used during extraction + rng : list + The [xmin, xmax] Range of x coordinates over which to extract peaks + pf : array-like + The sequence of peak functions that can be extracted + initial_peaks: Peaks object + The peaks present at start of extraction + baseline : Baseline object + The baseline for data + cres : float + The resolution of clustering + error_method : ErrorEvaluator class + The Evaluation class used to compare models Calculated members - extracted: ModelCluster after extraction - extraction_type: Type of extraction + ------------------ + extracted : ModelCluster object + The ModelCluster object after extraction + extraction_type : Type of extraction """ def __init__(self, newvars=[]): """Initialize PeakExtraction object. Parameters - newvars: Sequence of strings that represent additional extraction parameters.""" + newvars : array-like + Sequence of strings that represent additional extraction parameters.""" self.clear() - self.extractvars = dict.fromkeys(('effective_dy', 'rng', 'pf', 'initial_peaks', 'baseline', 'cres', 'error_method')) + self.extractvars = dict.fromkeys( + ( + "effective_dy", + "rng", + "pf", + "initial_peaks", + "baseline", + "cres", + "error_method", + ) + ) for k in newvars: if k not in self.extractvars: self.extractvars[k] = None else: - emsg = "Extraction variable %s conflicts with existing variable" %k + emsg = "Extraction variable %s conflicts with existing variable" % k raise ValueError(emsg) return def clear(self): - """Clear all members.""" + """Clear all members. + + The purpose of the method is to ensure the object is in initialized state.""" self.x = None self.y = None self.dx = None @@ -104,24 +130,32 @@ def setdata(self, x, y, dx=None, dy=None): if len(self.x) != len(self.dx) or len(self.x) != len(self.dy): emsg = "Sequences dx and dy (if present) must have the same length as x" raise ValueError(emsg) - #self.defaultvars() + # self.defaultvars() return def setvars(self, quiet=False, **kwds): """Set one or more extraction variables. - Variables - quiet: [False] Log changes quietly. - - Keywords - cres: The clustering resolution, must be > 0. - effective_dy: The uncertainties actually used during extraction - pf: Sequence of PeakFunctionBase subclass instances. - baseline: Baseline instance or BaselineFunction instance (use built-in estimation) - error_method: ErrorEvaluator subclass instance used to compare models (default AIC) - initial_peaks: Peaks instance. These peaks are present at the start of extraction. - rng: Sequence specifying the least and greatest x-values over which to extract peaks.""" - for k, v in kwds.iteritems(): + Parameters + ---------- + quiet : bool + The log changes quietly. Default is False. + cres : float + The clustering resolution, must be > 0. + effective_dy : array-like + The uncertainties actually used during extraction + pf : list + The sequence of PeakFunctionBase subclass instances. + baseline : Baseline instance or BaselineFunction instance + The Baseline instance or BaselineFunction instance that use built-in estimation + error_method : ErrorEvaluator subclass instance + The ErrorEvaluator subclass instance used to compare models. Default is AIC. + initial_peaks : Peaks instance + These peaks are present at the start of extraction. + rng : array-like + The sequence specifying the least and greatest x-values over which to extract peaks. + """ + for k, v in kwds.items(): if k in self.extractvars: if quiet: logger.debug("Setting variable %s=%s", k, v) @@ -137,46 +171,49 @@ def defaultvars(self, *args): """Set unset(=None) extraction variables to default values. Certain variables may be partially set for convenience, and are transformed - appropriately. See 'Default values' below. + appropriately. See 'Default values assigned' below. Parameters - Any number of strings corresponding to extraction variables. These - variables are reset to their default values even if another value - already exists. - - Default values: - cres -> 4 times the average spacing between elements in x - effective_dy -> The data dy if all elements > 0, otherwise 5% of max(y)-min(y). - If effective_dy is a positive scalar, then an array of that - value of appropriate length. - pf -> [GaussianOverR(maxwidth=x[-1]-x[0])] - baseline -> Flat baseline located at y=0. - error_method -> AIC (Aikake Information Criterion) - initial_peaks -> No initial peaks - rng -> [x[0], x[-1]]. Partially set ranges like [None, 100.] replace None with - the appropriate limit in the data. + ---------- + *args : str + The variable argument list where each string corresponds to an extraction + variable name. + + Default values assigned: + - `cres` : 4 times the average spacing between elements in `x`. + - `effective_dy` : If all elements in `y` are positive, it's set to the data `dy`; + otherwise, it's 5% of the range (`max(y)` - `min(y)`). If `effective_dy` + is a positive scalar, an array of that value with a length matching `y` is used. + - `pf` : A list containing a single Gaussian overlap function with the maximum width + spanning the entire `x` range (`x[-1] - x[0]`). + - `baseline` : A flat baseline at `y=0`, indicating no background signal. + - `error_method` : Uses the AIC (Akaike Information Criterion) for evaluating model fits. + - `initial_peaks` : Assumes no initial peak guesses, implying peaks will be detected from scratch. + - `rng` : The default range is set to span the entire `x` dataset, i.e., `[x[0], x[-1]]`. + If a range is partially defined, e.g., `[None, 100.]`, the `None` value is replaced + with the respective boundary of the `x` data. Note that the default values of very important parameters like the uncertainty and clustering resolution are crude guesses at best. """ - if self.cres is None or 'cres' in args: - self.cres = 4*(self.x[-1] - self.x[0])/len(self.x) + if self.cres is None or "cres" in args: + self.cres = 4 * (self.x[-1] - self.x[0]) / len(self.x) - if self.effective_dy is None or 'effective_dy' in args: + if self.effective_dy is None or "effective_dy" in args: if np.all(self.dy > 0): # That is, all points positive uncertainty. self.effective_dy = self.dy else: # A terribly crude guess - self.effective_dy = .05*(np.max(self.y)-np.min(self.y))*np.ones(len(self.x)) + self.effective_dy = 0.05 * (np.max(self.y) - np.min(self.y)) * np.ones(len(self.x)) elif np.isscalar(self.effective_dy) and self.effective_dy > 0: - self.effective_dy = self.effective_dy*np.ones(len(self.x)) + self.effective_dy = self.effective_dy * np.ones(len(self.x)) if self.pf is None or "pf" in args: - from diffpy.srmise.peaks import GaussianOverR + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR # TODO: Make a more useful default. - self.pf = [GaussianOverR(self.x[-1]-self.x[0])] + self.pf = [GaussianOverR(self.x[-1] - self.x[0])] if self.rng is None or "rng" in args: self.rng = [self.x[0], self.x[-1]] @@ -192,44 +229,51 @@ def defaultvars(self, *args): s = self.getrangeslice() epars = self.baseline.estimate_parameters(self.x[s], self.y[s]) self.baseline = self.baseline.actualize(epars, "internal") - logger.info("Estimating baseline: %s" %self.baseline) + logger.info("Estimating baseline: %s" % self.baseline) except (NotImplementedError, SrMiseEstimationError): logger.error("Could not estimate baseline from provided BaselineFunction, trying default values.") self.baseline = None if self.baseline is None or "baseline" in args: - from diffpy.srmise.baselines import Polynomial - bl = Polynomial(degree = -1) + from diffpy.srmise.baselines.polynomial import Polynomial + + bl = Polynomial(degree=-1) self.baseline = bl.actualize(np.array([]), "internal") if self.error_method is None or "error_method" in args: - from diffpy.srmise.modelevaluators import AIC + from diffpy.srmise.modelevaluators.aic import AIC + self.error_method = AIC if self.initial_peaks is None or "initial_peaks" in args: self.initial_peaks = Peaks() - def __str__(self): """Return string summary of PeakExtraction.""" out = [] for k in self.extractvars: - out.append("%s: %s" %(k, getattr(self, k))) + out.append("%s: %s" % (k, getattr(self, k))) if self.extracted is not None: - out.append("Extraction type: %s" %self.extraction_type) + out.append("Extraction type: %s" % self.extraction_type) out.append("--- Extracted ---") out.append(str(self.extracted)) else: out.append("No extracted peaks exist.") - return '\n'.join(out)+'\n' + return "\n".join(out) + "\n" def plot(self, **kwds): """Convenience function to plot data and extracted peaks with matplotlib. Uses initial peaks instead if no peaks have been extracted. - Takes same keywords as ModelCluster.plottable()""" + Takes same keywords as ModelCluster.plottable() + + Parameters + ---------- + **kwds :args + The keyword arguments to pass to matplotlib. + """ plt.clf() if self.extracted is not None: plt.plot(*self.extracted.plottable(kwds)) @@ -242,24 +286,39 @@ def plot(self, **kwds): x = self.x[rangeslice] y = self.y[rangeslice] dy = self.dy[rangeslice] - mcluster = ModelCluster(self.initial_peaks, self.baseline, x, y, dy, None, self.error_method, self.pf) + mcluster = ModelCluster( + self.initial_peaks, + self.baseline, + x, + y, + dy, + None, + self.error_method, + self.pf, + ) plt.plot(*mcluster.plottable(kwds)) - def read(self, filename): """load PeakExtraction object from file - filename -- file from which to read + Parameters + ---------- + filename : str + The file from which to read - returns self + Returns + ------- + self """ try: - self.readstr(open(filename,'rb').read()) - except SrMiseDataFormatError, err: + self.readstr(open(filename, "rb").read()) + except SrMiseDataFormatError as err: logger.exception("") basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) return self @@ -267,7 +326,10 @@ def readstr(self, datastring): """Initialize members from string. Parameters - datastring: The raw data to read""" + ---------- + datastring : array-like + The raw data to read + """ from diffpy.srmise.basefunction import BaseFunction self.clear() @@ -288,126 +350,126 @@ def readstr(self, datastring): safebf = [] # find where the results section starts - res = re.search(r'^#+ Results\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ Results\s*(?:#.*\s+)*", datastring, re.M) if res: - results = datastring[res.end():].strip() - header = datastring[:res.start()] + results = datastring[res.end() :].strip() + header = datastring[: res.start()] # find data section, and what information it contains - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", header, re.M) if res: - start_data = header[res.end():].strip() - start_data_info = header[res.start():res.end()] - header = header[:res.start()] - res = re.search(r'^(#+L.*)$', start_data_info, re.M) + start_data = header[res.end() :].strip() + start_data_info = header[res.start() : res.end()] + header = header[: res.start()] + res = re.search(r"^(#+L.*)$", start_data_info, re.M) if res: - start_data_info = start_data_info[res.start():res.end()].strip() + start_data_info = start_data_info[res.start() : res.end()].strip() hasx = False hasy = False hasdx = False hasdy = False hasedy = False - res = re.search(r'\bx\b', start_data_info) + res = re.search(r"\bx\b", start_data_info) if res: hasx = True - res = re.search(r'\by\b', start_data_info) + res = re.search(r"\by\b", start_data_info) if res: hasy = True - res = re.search(r'\bdx\b', start_data_info) + res = re.search(r"\bdx\b", start_data_info) if res: hasdx = True - res = re.search(r'\bdy\b', start_data_info) + res = re.search(r"\bdy\b", start_data_info) if res: hasdy = True - res = re.search(r'\edy\b', start_data_info) + res = re.search(r"\edy\b", start_data_info) if res: hasedy = True - res = re.search(r'^#+ Metadata\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ Metadata\s*(?:#.*\s+)*", header, re.M) if res: - metadata = header[res.end():].strip() - header = header[:res.start()] + metadata = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ SrMiseMetadata\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ SrMiseMetadata\s*(?:#.*\s+)*", header, re.M) if res: - srmisemetadata = header[res.end():].strip() - header = header[:res.start()] + srmisemetadata = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ InitialPeaks.*$', header, re.M) + res = re.search(r"^#+ InitialPeaks.*$", header, re.M) if res: - initial_peaks = header[res.end():].strip() - header = header[:res.start()] + initial_peaks = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ BaselineObject\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ BaselineObject\s*(?:#.*\s+)*", header, re.M) if res: - baselineobject = header[res.end():].strip() - header = header[:res.start()] + baselineobject = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ PeakFunctions.*$', header, re.M) + res = re.search(r"^#+ PeakFunctions.*$", header, re.M) if res: - peakfunctions = header[res.end():].strip() - header = header[:res.start()] + peakfunctions = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ BaselineFunctions.*$', header, re.M) + res = re.search(r"^#+ BaselineFunctions.*$", header, re.M) if res: - baselinefunctions = header[res.end():].strip() - header = header[:res.start()] + baselinefunctions = header[res.end() :].strip() + header = header[: res.start()] - ### Instantiating baseline functions - res = re.split(r'(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*', baselinefunctions) + # Instantiating baseline functions + res = re.split(r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions) for s in res[1:]: safebf.append(BaseFunction.factory(s, safebf)) - ### Instantiating peak functions - res = re.split(r'(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*', peakfunctions) + # Instantiating peak functions + res = re.split(r"(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*", peakfunctions) for s in res[1:]: safepf.append(BaseFunction.factory(s, safepf)) - ### Instantiating Baseline object - if re.match(r'^None$', baselineobject): + # Instantiating Baseline object + if re.match(r"^None$", baselineobject): self.baseline = None - elif re.match(r'^\d+$', baselineobject): + elif re.match(r"^\d+$", baselineobject): self.baseline = safebf[int(baselineobject)] else: self.baseline = Baseline.factory(baselineobject, safebf) - ### Instantiating initial peaks - if re.match(r'^None$', initial_peaks): + # Instantiating initial peaks + if re.match(r"^None$", initial_peaks): self.initial_peaks = None else: self.initial_peaks = Peaks() - res = re.split(r'(?m)^#+ InitialPeak\s*(?:#.*\s+)*', initial_peaks) + res = re.split(r"(?m)^#+ InitialPeak\s*(?:#.*\s+)*", initial_peaks) for s in res[1:]: self.initial_peaks.append(Peak.factory(s, safepf)) - ### Instantiating srmise metatdata + # Instantiating srmise metatdata # pf - res = re.search(r'^pf=(.*)$', srmisemetadata, re.M) + res = re.search(r"^pf=(.*)$", srmisemetadata, re.M) self.pf = eval(res.groups()[0].strip()) if self.pf is not None: self.pf = [safepf[i] for i in self.pf] # cres - rx = { 'f' : r'[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?' } + rx = {"f": r"[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?"} regexp = r"\bcres *= *(%(f)s)\b" % rx res = re.search(regexp, srmisemetadata, re.I) self.cres = float(res.groups()[0]) # error_method - res = re.search(r'^ModelEvaluator=(.*)$', srmisemetadata, re.M) + res = re.search(r"^ModelEvaluator=(.*)$", srmisemetadata, re.M) __import__("diffpy.srmise.modelevaluators") module = sys.modules["diffpy.srmise.modelevaluators"] self.error_method = getattr(module, res.groups()[0].strip()) # range - res = re.search(r'^Range=(.*)$', srmisemetadata, re.M) + res = re.search(r"^Range=(.*)$", srmisemetadata, re.M) self.rng = eval(res.groups()[0].strip()) - ### Instantiating other metadata + # Instantiating other metadata self.readmetadata(metadata) - ### Instantiating start data + # Instantiating start data # read actual data - x, y, dx, dy, plus effective_dy arrays = [] if hasx: @@ -438,12 +500,12 @@ def readstr(self, datastring): # raise SrMiseDataFormatError if something goes wrong try: for line in start_data.split("\n"): - l = line.split() - if len(arrays) != len(l): - emsg = ("Number of value fields does not match that given by '%s'" %start_data_info) + split_line = line.split() + if len(arrays) != len(split_line): + emsg = "Number of value fields does not match that given by '%s'" % start_data_info for a, v in zip(arrays, line.split()): a.append(float(v)) - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise SrMiseDataFormatError(str(err)) if hasx: self.x = np.array(self.x) @@ -456,15 +518,14 @@ def readstr(self, datastring): if hasedy: self.effective_dy = np.array(self.effective_dy) - - ### Instantiating results - res = re.search(r'^#+ ModelCluster\s*(?:#.*\s+)*', results, re.M) + # Instantiating results + res = re.search(r"^#+ ModelCluster\s*(?:#.*\s+)*", results, re.M) if res: - mc = results[res.end():].strip() - results = results[:res.start()] + mc = results[res.end() :].strip() + results = results[: res.start()] # extraction type - res = re.search(r'^extraction_type=(.*)$', results, re.M) + res = re.search(r"^extraction_type=(.*)$", results, re.M) if res: self.extraction_type = eval(res.groups()[0].strip()) else: @@ -472,7 +533,7 @@ def readstr(self, datastring): raise SrMiseDataFormatError(emsg) # extracted - if re.match(r'^None$', mc): + if re.match(r"^None$", mc): self.extracted = None else: self.extracted = ModelCluster.factory(mc, pfbaselist=safepf, blfbaselist=safebf) @@ -481,16 +542,23 @@ def write(self, filename): """Write string representation of PeakExtraction instance to file. Parameters - filename: the name of the file to write""" + ---------- + filename : str + The name of the file to write + """ bytes = self.writestr() - f = open(filename, 'w') + f = open(filename, "w") f.write(bytes) f.close() return - def writestr(self): - """Return string representation of PeakExtraction object.""" + """Return string representation of PeakExtraction object. + + Returns + ------- + The str representation of PeakExtraction object + """ import time from getpass import getuser @@ -500,11 +568,14 @@ def writestr(self): lines = [] # Header - lines.extend([ - 'History written: ' + time.ctime(), - 'produced by ' + getuser(), - 'diffpy.srmise version %s' %__version__, - '##### PDF Peak Extraction' ]) + lines.extend( + [ + "History written: " + time.ctime(), + "produced by " + getuser(), + "diffpy.srmise version %s" % __version__, + "##### PDF Peak Extraction", + ] + ) # Generate list of PeakFunctions and BaselineFunctions # so I can refer to them by index when necessary. @@ -517,7 +588,7 @@ def writestr(self): if self.baseline is not None: if isinstance(self.baseline, BaseFunction): allbf.append(self.baseline) - else: # should be a ModelPart + else: # should be a ModelPart allbf.append(self.baseline.owner()) if self.extracted is not None: allpf.extend(self.extracted.peak_funcs) @@ -532,13 +603,13 @@ def writestr(self): # Indexed baseline functions lines.append("## BaselineFunctions") for i, bf in enumerate(safebf): - lines.append('# BaselineFunction %s' %i) + lines.append("# BaselineFunction %s" % i) lines.append(bf.writestr(safebf)) # Indexed peak functions lines.append("## PeakFunctions") for i, pf in enumerate(safepf): - lines.append('# PeakFunction %s' %i) + lines.append("# PeakFunction %s" % i) lines.append(pf.writestr(safepf)) # Baseline @@ -546,7 +617,7 @@ def writestr(self): if self.baseline is None: lines.append("None") elif self.baseline in safebf: - lines.append('%s' %repr(safebf.index(self.baseline))) + lines.append("%s" % repr(safebf.index(self.baseline))) else: lines.append(self.baseline.writestr(safebf)) @@ -556,34 +627,34 @@ def writestr(self): lines.append("None") else: for ip in self.initial_peaks: - lines.append('# InitialPeak') + lines.append("# InitialPeak") lines.append(ip.writestr(safepf)) - lines.append('# SrMiseMetadata') + lines.append("# SrMiseMetadata") # Extractable peak types if self.pf is None: lines.append("pf=None") else: - lines.append("pf=%s" %repr([safepf.index(p) for p in self.pf])) + lines.append("pf=%s" % repr([safepf.index(p) for p in self.pf])) # Clustering resolution - lines.append('cres=%g' %self.cres) + lines.append("cres=%g" % self.cres) # Model evaluator if self.error_method is None: - lines.append('ModelEvaluator=None') + lines.append("ModelEvaluator=None") else: - lines.append('ModelEvaluator=%s' %self.error_method.__name__) + lines.append("ModelEvaluator=%s" % self.error_method.__name__) # Extraction range - lines.append("Range=%s" %repr(self.rng)) + lines.append("Range=%s" % repr(self.rng)) # Everything not defined by PeakExtraction - lines.append('# Metadata') + lines.append("# Metadata") lines.append(self.writemetadata()) # Raw data used in extraction. - lines.append('##### start data') - line = ['#L'] + lines.append("##### start data") + line = ["#L"] numlines = 0 if self.x is not None: line.append("x") @@ -604,29 +675,28 @@ def writestr(self): for i in range(numlines): line = [] if self.x is not None: - line.append("%g" %self.x[i]) + line.append("%g" % self.x[i]) if self.y is not None: - line.append("%g" %self.y[i]) + line.append("%g" % self.y[i]) if self.dx is not None: - line.append("%g" %self.dx[i]) + line.append("%g" % self.dx[i]) if self.dy is not None: - line.append("%g" %self.dy[i]) + line.append("%g" % self.dy[i]) if self.effective_dy is not None: - line.append("%g" %self.effective_dy[i]) + line.append("%g" % self.effective_dy[i]) lines.append(" ".join(line)) - - ### Calculated members - lines.append('##### Results') - lines.append('extraction_type=%s' %repr(self.extraction_type)) + # Calculated members + lines.append("##### Results") + lines.append("extraction_type=%s" % repr(self.extraction_type)) lines.append("### ModelCluster") if self.extracted is None: - lines.append('None') + lines.append("None") else: lines.append(self.extracted.writestr(pfbaselist=safepf, blfbaselist=safebf)) - datastring = "\n".join(lines)+"\n" + datastring = "\n".join(lines) + "\n" return datastring def writemetadata(self): @@ -647,7 +717,7 @@ def getrangeslice(self): while self.x[low_idx] < max(self.x[0], self.rng[0]): low_idx += 1 hi_idx = len(self.x) - while self.x[hi_idx-1] > min(self.x[-1], self.rng[1]): + while self.x[hi_idx - 1] > min(self.x[-1], self.rng[1]): hi_idx -= 1 return slice(low_idx, hi_idx) @@ -657,11 +727,16 @@ def estimate_peak(self, x, add=True): Peaks already extracted, if any, are taken into account. If none exist, use those specified by initial_peaks instead. - Parameters: - x: Coordinate of the point of interest - add: (True) Automatically add peak to extracted peaks or initial_peaks. - - Return a Peak object, or None if estimation fails. + Parameters + ---------- + x : array-like + The oordinate of the point of interest + add : bool + Automatically add peak to extracted peaks or initial_peaks. Default is True. + + Returns + ------- + The Peak object, or None if estimation fails. """ # Make sure all required extraction variables have some value self.defaultvars() @@ -679,14 +754,14 @@ def estimate_peak(self, x, add=True): dy = self.effective_dy[rangeslice] if x < x1[0] or x > x1[-1]: - emsg = "Argument x=%s outside allowed range (%s, %s)." %(x, x1[0], x1[-1]) + emsg = "Argument x=%s outside allowed range (%s, %s)." % (x, x1[0], x1[-1]) raise ValueError(emsg) # Object performing clustering on data. Note that DataClusters # provides an iterator that clusters the next point and returns # itself. Thus, dclusters and step (below) refer to the same object. - dclusters = DataClusters(x1, y1, self.cres) # Cluster with baseline removed + dclusters = DataClusters(x1, y1, self.cres) # Cluster with baseline removed dclusters.makeclusters() cidx = dclusters.find_nearest_cluster2(x)[0] cslice = dclusters.cut(cidx) @@ -700,10 +775,10 @@ def estimate_peak(self, x, add=True): if len(mcluster.model) > 0: if add: - logger.info("Adding peak: %s" %mcluster.model[0]) + logger.info("Adding peak: %s" % mcluster.model[0]) self.add_peaks(mcluster.model) else: - logger.info("Found peak: %s" %mcluster.model[0]) + logger.info("Found peak: %s" % mcluster.model[0]) return mcluster.model[0] else: logger.info("No peaks found.") @@ -713,7 +788,10 @@ def add_peaks(self, peaks): """Add peaks to extracted peaks, or initial_peaks if no extracted peaks exist. Parameters - peaks: A Peaks instance""" + ---------- + peaks: Peaks object + The peaks instance + """ if self.extracted is not None: self.extracted.replacepeaks(peaks) else: @@ -725,12 +803,12 @@ def add_peaks(self, peaks): def extract_single(self, recursion_depth=1): """Find ModelCluster with peaks extracted from data. Return ModelCovariance instance at top level. - Every extracted peak is one of the peak functions supplied. All - comparisons of different peak models are performed with the class - specified by error_method. + Every extracted peak is one of the peak functions supplied. All + comparisons of different peak models are performed with the class + specified by error_method. - Parameters - recursion_depth: (1) Tracks recursion with extract_single.""" + Parameters + recursion_depth: (1) Tracks recursion with extract_single.""" self.clearcalc() tracer = srmiselog.tracer tracer.pushc() @@ -755,7 +833,7 @@ def extract_single(self, recursion_depth=1): # provides an iterator that clusters the next point and returns # itself. Thus, dclusters and step (below) refer to the same object. - dclusters = DataClusters(x, y, self.cres) # Cluster with baseline removed + dclusters = DataClusters(x, y, self.cres) # Cluster with baseline removed # The data for model clusters includes the baseline y = self.y[rangeslice] - ip.value(x) @@ -770,32 +848,37 @@ def extract_single(self, recursion_depth=1): stepcounter = 0 - ############################ - ### Main extraction loop ### + # ######################### + # Main extraction loop ### for step in dclusters: stepcounter += 1 msg = "\n\n------ Recursion: %s Step: %s Cluster: %s %s ------" - logger.debug(msg, - recursion_depth, - stepcounter, - step.lastcluster_idx, - step.clusters[step.lastcluster_idx] - ) + logger.debug( + msg, + recursion_depth, + stepcounter, + step.lastcluster_idx, + step.clusters[step.lastcluster_idx], + ) # Update mclusters if len(step.clusters) > len(mclusters): # Add a new cluster - mclusters.insert(step.lastcluster_idx, - ModelCluster(None, - bl, - x, - y, - dy, - step.cut(step.lastcluster_idx), - self.error_method, - self.pf)) + mclusters.insert( + step.lastcluster_idx, + ModelCluster( + None, + bl, + x, + y, + dy, + step.cut(step.lastcluster_idx), + self.error_method, + self.pf, + ), + ) else: # Update an existing cluster mclusters[step.lastcluster_idx].change_slice(step.cut(step.lastcluster_idx)) @@ -812,8 +895,8 @@ def extract_single(self, recursion_depth=1): # three clusters can become adjacent at any given step. assert len(adjacent) <= 3 - ### Update cluster fits ### - #1. Refit clusters adjacent to at least one other cluster. + # Update cluster fits ### + # 1. Refit clusters adjacent to at least one other cluster. for a in adjacent: mclusters[a].fit(justify=True) @@ -842,7 +925,7 @@ def extract_single(self, recursion_depth=1): # enlarged cluster ("new_cluster") or an intermediate cluster # ("adj_cluster"). - if step.lastpoint_idx == 0 or step.lastpoint_idx == len(step.x)-1: + if step.lastpoint_idx == 0 or step.lastpoint_idx == len(step.x) - 1: logger.debug("Boundary full: %s", step.lastpoint_idx) full_cluster = ModelCluster(mclusters[step.lastcluster_idx]) full_cluster.fit(True) @@ -853,7 +936,7 @@ def extract_single(self, recursion_depth=1): # Determine neighborhood appropriate for fitting (no larger than combined clusters) if len(full_cluster.model) > 0: - peak_pos = np.array([p['position'] for p in full_cluster.model]) + peak_pos = np.array([p["position"] for p in full_cluster.model]) pivot = peak_pos.searchsorted(border_x) else: peak_pos = np.array([]) @@ -864,111 +947,140 @@ def extract_single(self, recursion_depth=1): # left_data, right_data: indices defining the extent of the "interpeak range" for x, etc. near_peaks = np.array([], dtype=np.int) - # interpeak range goes from peak to peak of next nearest peaks, although their contributions to the data are still removed. + # interpeak range goes from peak to peak of next nearest peaks, although their contributions + # to the data are still removed. if pivot == 0: # No peaks left of border_x! left_data = full_cluster.slice.indices(len(x))[0] elif pivot == 1: # One peak left left_data = full_cluster.slice.indices(len(x))[0] - near_peaks = np.append(near_peaks, pivot-1) + near_peaks = np.append(near_peaks, pivot - 1) else: # left_data -> one more peak to the left - left_data = max(0, x.searchsorted(peak_pos[pivot-2])-1) - near_peaks = np.append(near_peaks, pivot-1) + left_data = max(0, x.searchsorted(peak_pos[pivot - 2]) - 1) + near_peaks = np.append(near_peaks, pivot - 1) if pivot == len(peak_pos): # No peaks right of border_x! - right_data = full_cluster.slice.indices(len(x))[1]-1 - elif pivot == len(peak_pos)-1: + right_data = full_cluster.slice.indices(len(x))[1] - 1 + elif pivot == len(peak_pos) - 1: # One peak right - right_data = full_cluster.slice.indices(len(x))[1]-1 + right_data = full_cluster.slice.indices(len(x))[1] - 1 near_peaks = np.append(near_peaks, pivot) else: # right_data -> one more peak to the right - right_data = min(len(x), x.searchsorted(peak_pos[pivot+1])+1) + right_data = min(len(x), x.searchsorted(peak_pos[pivot + 1]) + 1) near_peaks = np.append(near_peaks, pivot) - other_peaks = np.concatenate([np.arange(0, pivot-1), np.arange(pivot+1, len(peak_pos))]) + other_peaks = np.concatenate([np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))]) # Go from indices to lists of peaks. near_peaks = Peaks([full_cluster.model[i] for i in near_peaks]) other_peaks = Peaks([full_cluster.model[i] for i in other_peaks]) - ### Remove contribution of peaks outside neighborhood + # Remove contribution of peaks outside neighborhood # Define range of fitting/recursion to the interpeak range # The adjusted error is passed unchanged. This may introduce # a few more peaks than is justified, but they can be pruned # with the correct statistics at the top level of recursion. - adj_slice = slice(left_data, right_data+1) + adj_slice = slice(left_data, right_data + 1) adj_x = x[adj_slice] - adj_y = y[adj_slice]-other_peaks.value(adj_x) + adj_y = y[adj_slice] - other_peaks.value(adj_x) adj_error = dy[adj_slice] - adj_cluster = ModelCluster(near_peaks, bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) + adj_cluster = ModelCluster( + near_peaks, + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) # Recursively cluster/fit the residual rec_r = adj_x - rec_y = adj_y-near_peaks.value(rec_r) + rec_y = adj_y - near_peaks.value(rec_r) rec_error = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r, rec_y, rec_error, None, self.error_method, self.pf) + checkrec = ModelCluster( + None, + None, + rec_r, + rec_y, + rec_error, + None, + self.error_method, + self.pf, + ) recurse = len(near_peaks) > 0 and checkrec.quality().growth_justified(checkrec, min_npars) if recurse and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (full boundary)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (full boundary)************" + % (recursion_depth + 1) + ) rec_search = PeakExtraction() rec_search.setdata(rec_r, rec_y, None, rec_error) - rec_search.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search.extract_single(recursion_depth+1) + rec_search.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search.extract_single(recursion_depth + 1) rec = rec_search.extracted - logger.info("*********ENDING RECURSION level %s (full boundary) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (full boundary) ************\n" % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster.augment(rec) - ### Select which model to use + # Select which model to use full_cluster.model = other_peaks full_cluster.replacepeaks(adj_cluster.model) full_cluster.fit(True) - msg = ["---Result of full boundary---", - "Original cluster:", - "%s", - "Final cluster:", - "%s", - "---End of combining clusters---"] - logger.debug("\n".join(msg), - mclusters[step.lastcluster_idx], - full_cluster) + msg = [ + "---Result of full boundary---", + "Original cluster:", + "%s", + "Final cluster:", + "%s", + "---End of combining clusters---", + ] + logger.debug("\n".join(msg), mclusters[step.lastcluster_idx], full_cluster) mclusters[step.lastcluster_idx] = full_cluster - ### End update cluster fits ### + # End update cluster fits ### - ### Combine adjacent clusters ### + # Combine adjacent clusters ### # Iterate in reverse order to preserve earlier indices for idx in adjacent[-1:0:-1]: msg = ["Current model"] msg.extend(["%s" for m in mclusters]) - logger.debug("\n".join(msg), - *[m.model for m in mclusters]) + logger.debug("\n".join(msg), *[m.model for m in mclusters]) - cleft = step.clusters[idx-1] + cleft = step.clusters[idx - 1] cright = step.clusters[idx] - new_cluster = ModelCluster.join_adjacent(mclusters[idx-1], mclusters[idx]) + new_cluster = ModelCluster.join_adjacent(mclusters[idx - 1], mclusters[idx]) # Estimate coordinate where clusters combine. - border_x = .5*(x[cleft[1]]+x[cright[0]]) - border_y = .5*(y[cleft[1]]+y[cright[0]]) + border_x = 0.5 * (x[cleft[1]] + x[cright[0]]) + border_y = 0.5 * (y[cleft[1]] + y[cright[0]]) # Determine neighborhood appropriate for fitting (no larger than combined clusters) if len(new_cluster.model) > 0: - peak_pos = np.array([p['position'] for p in new_cluster.model]) + peak_pos = np.array([p["position"] for p in new_cluster.model]) pivot = peak_pos.searchsorted(border_x) else: peak_pos = np.array([]) @@ -979,53 +1091,72 @@ def extract_single(self, recursion_depth=1): # left_data, right_data: indices defining the extent of the "interpeak range" for x, etc. near_peaks = np.array([], dtype=np.int) - # interpeak range goes from peak to peak of next nearest peaks, although their contributions to the data are still removed. + # interpeak range goes from peak to peak of next nearest peaks, although their contributions + # to the data are still removed. if pivot == 0: # No peaks left of border_x! - left_data=new_cluster.slice.indices(len(x))[0] + left_data = new_cluster.slice.indices(len(x))[0] elif pivot == 1: # One peak left left_data = new_cluster.slice.indices(len(x))[0] - near_peaks = np.append(near_peaks, pivot-1) + near_peaks = np.append(near_peaks, pivot - 1) else: # left_data -> one more peak to the left - left_data = max(0,x.searchsorted(peak_pos[pivot-2])-1) - near_peaks = np.append(near_peaks, pivot-1) + left_data = max(0, x.searchsorted(peak_pos[pivot - 2]) - 1) + near_peaks = np.append(near_peaks, pivot - 1) if pivot == len(peak_pos): # No peaks right of border_x! - right_data = new_cluster.slice.indices(len(x))[1]-1 - elif pivot == len(peak_pos)-1: + right_data = new_cluster.slice.indices(len(x))[1] - 1 + elif pivot == len(peak_pos) - 1: # One peak right - right_data = new_cluster.slice.indices(len(x))[1]-1 + right_data = new_cluster.slice.indices(len(x))[1] - 1 near_peaks = np.append(near_peaks, pivot) else: # right_data -> one more peak to the right - right_data = min(len(x), x.searchsorted(peak_pos[pivot+1])+1) + right_data = min(len(x), x.searchsorted(peak_pos[pivot + 1]) + 1) near_peaks = np.append(near_peaks, pivot) - other_peaks = np.concatenate([np.arange(0, pivot-1), np.arange(pivot+1, len(peak_pos))]) + other_peaks = np.concatenate([np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))]) # Go from indices to lists of peaks. near_peaks = Peaks([new_cluster.model[i] for i in near_peaks]) other_peaks = Peaks([new_cluster.model[i] for i in other_peaks]) - ### Remove contribution of peaks outside neighborhood + # Remove contribution of peaks outside neighborhood # Define range of fitting/recursion to the interpeak range # The adjusted error is passed unchanged. This may introduce # a few more peaks than is justified, but they can be pruned # with the correct statistics at the top level of recursion. - adj_slice = slice(left_data, right_data+1) + adj_slice = slice(left_data, right_data + 1) adj_x = x[adj_slice] - adj_y = y[adj_slice]-other_peaks.value(adj_x) + adj_y = y[adj_slice] - other_peaks.value(adj_x) adj_error = dy[adj_slice] - #### Perform recursion on a version that is scaled at the + # # Perform recursion on a version that is scaled at the # border, as well as on that is simply fit beforehand. In # many cases these lead to nearly identical results, but # occasionally one works much better than the other. - adj_cluster1 = ModelCluster(near_peaks.copy(), bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) - adj_cluster2 = ModelCluster(near_peaks.copy(), bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) + adj_cluster1 = ModelCluster( + near_peaks.copy(), + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) + adj_cluster2 = ModelCluster( + near_peaks.copy(), + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) # Adjust cluster at border if there is at least one peak on # either side. @@ -1034,23 +1165,44 @@ def extract_single(self, recursion_depth=1): # Recursively cluster/fit the residual rec_r1 = adj_x - #rec_y1 = adj_y - near_peaks.value(rec_r1) + # rec_y1 = adj_y - near_peaks.value(rec_r1) rec_y1 = adj_y - adj_cluster1.model.value(rec_r1) rec_error1 = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r1, rec_y1, rec_error1, None, self.error_method, self.pf) + checkrec = ModelCluster( + None, + None, + rec_r1, + rec_y1, + rec_error1, + None, + self.error_method, + self.pf, + ) recurse1 = checkrec.quality().growth_justified(checkrec, min_npars) if recurse1 and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (reduce at border)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (reduce at border)************" + % (recursion_depth + 1) + ) rec_search1 = PeakExtraction() rec_search1.setdata(rec_r1, rec_y1, None, rec_error1) - rec_search1.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search1.extract_single(recursion_depth+1) + rec_search1.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search1.extract_single(recursion_depth + 1) rec1 = rec_search1.extracted - logger.info("*********ENDING RECURSION level %s (reduce at border) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (reduce at border) ************\n" + % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster1.augment(rec1) @@ -1060,28 +1212,47 @@ def extract_single(self, recursion_depth=1): # Recursively cluster/fit the residual rec_r2 = adj_x - #rec_y2 = adj_y - near_peaks.value(rec_r2) + # rec_y2 = adj_y - near_peaks.value(rec_r2) rec_y2 = adj_y - adj_cluster2.model.value(rec_r2) rec_error2 = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r2, rec_y2, rec_error2, None, self.error_method, self.pf) + checkrec = ModelCluster( + None, + None, + rec_r2, + rec_y2, + rec_error2, + None, + self.error_method, + self.pf, + ) recurse2 = len(near_peaks) > 0 and checkrec.quality().growth_justified(checkrec, min_npars) if recurse2 and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (prefit)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (prefit)************" % (recursion_depth + 1) + ) rec_search2 = PeakExtraction() rec_search2.setdata(rec_r2, rec_y2, None, rec_error2) - rec_search2.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search2.extract_single(recursion_depth+1) + rec_search2.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search2.extract_single(recursion_depth + 1) rec2 = rec_search2.extracted - logger.info("*********ENDING RECURSION level %s (prefit) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (prefit) ************\n" % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster2.augment(rec2) - ### Select which model to use + # Select which model to use new_cluster.model = other_peaks rej_cluster = ModelCluster(new_cluster) q1 = adj_cluster1.quality(self.error_method) @@ -1095,25 +1266,23 @@ def extract_single(self, recursion_depth=1): new_cluster.fit(True) + msg = [ + "---Result of combining clusters---", + "First cluster:", + "%s", + "Second cluster:", + "%s", + "Resulting cluster:", + "%s", + "---End of combining clusters---", + ] - msg = ["---Result of combining clusters---", - "First cluster:", - "%s", - "Second cluster:", - "%s", - "Resulting cluster:", - "%s", - "---End of combining clusters---"] - - logger.debug("\n".join(msg), - mclusters[idx-1], - mclusters[idx], - new_cluster) + logger.debug("\n".join(msg), mclusters[idx - 1], mclusters[idx], new_cluster) - mclusters[idx-1] = new_cluster + mclusters[idx - 1] = new_cluster del mclusters[idx] - ### End combine adjacent clusters loop ### + # End combine adjacent clusters loop ### # Finally, combine clusters in dclusters if len(adjacent) > 0: @@ -1121,9 +1290,8 @@ def extract_single(self, recursion_depth=1): tracer.emit(*mclusters) - - ### End main extraction loop ### - ################################ + # End main extraction loop ### + # ############################# # Put initial peaks back in mclusters[0].addexternalpeaks(ip) @@ -1166,13 +1334,20 @@ def fit_single(self): dy = self.effective_dy[rngslice] # Set up ModelCluster - ext = ModelCluster(self.initial_peaks, self.baseline, x, y, dy, None, - self.error_method, self.pf) + ext = ModelCluster( + self.initial_peaks, + self.baseline, + x, + y, + dy, + None, + self.error_method, + self.pf, + ) # Fit model with baseline and calculate covariance matrix cov = ModelCovariance() - ext.fit(fitbaseline=True, estimate=False, cov=cov, - cov_format="default_output") + ext.fit(fitbaseline=True, estimate=False, cov=cov, cov_format="default_output") # Update calculated instance variables self.extraction_type = "fit_single" @@ -1180,28 +1355,28 @@ def fit_single(self): return cov -#end PeakExtraction class + +# end PeakExtraction class # simple test code -if __name__ == '__main__': +if __name__ == "__main__": from numpy.random import randn - from diffpy.srmise import srmiselog - from diffpy.srmise.modelevaluators import AICc - from diffpy.srmise.peaks import GaussianOverR + from diffpy.srmise.modelevaluators.aicc import AICc + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR srmiselog.setlevel("info") srmiselog.liveplotting(False) - pf = GaussianOverR(.7) - res = .01 + pf = GaussianOverR(0.7) + res = 0.01 - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - r = np.arange(2,4,res) + r = np.arange(2, 4, res) y = ideal_peaks.value(r) + randn(len(r)) err = np.ones(len(r)) @@ -1209,14 +1384,14 @@ def fit_single(self): te = PeakExtraction() te.setdata(r, y, None, err) - te.setvars(rng=[1.51,10.], pf=[pf], cres=.1, effective_dy = 1.5*err) + te.setvars(rng=[1.51, 10.0], pf=[pf], cres=0.1, effective_dy=1.5 * err) te.extract_single() - print "--- Actual Peak parameters ---" - print ideal_peaks + print("--- Actual Peak parameters ---") + print(ideal_peaks) - print "\n--- After extraction ---" - print te + print("\n--- After extraction ---") + print(te) te.plot() - raw_input() + input() diff --git a/diffpy/srmise/baselines/__init__.py b/src/diffpy/srmise/peaks/__init__.py similarity index 61% rename from diffpy/srmise/baselines/__init__.py rename to src/diffpy/srmise/peaks/__init__.py index fc8ad98..5540acb 100644 --- a/diffpy/srmise/baselines/__init__.py +++ b/src/diffpy/srmise/peaks/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -10,11 +11,3 @@ # See LICENSE.txt for license information. # ############################################################################## - -__all__ = ["base", "arbitrary", "fromsequence", "nanospherical", "polynomial"] - -from arbitrary import Arbitrary -from base import Baseline -from fromsequence import FromSequence -from nanospherical import NanoSpherical -from polynomial import Polynomial diff --git a/diffpy/srmise/peaks/base.py b/src/diffpy/srmise/peaks/base.py similarity index 84% rename from diffpy/srmise/peaks/base.py rename to src/diffpy/srmise/peaks/base.py index d8e3469..44c7963 100644 --- a/diffpy/srmise/peaks/base.py +++ b/src/diffpy/srmise/peaks/base.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -15,13 +16,13 @@ import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.basefunction import BaseFunction from diffpy.srmise.modelparts import ModelPart, ModelParts -from diffpy.srmise.srmiseerrors import * +from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseScalingError logger = logging.getLogger("diffpy.srmise") + class PeakFunction(BaseFunction): """Base class for functions which represent peaks. @@ -60,7 +61,15 @@ class PeakFunction(BaseFunction): transform_parameters() """ - def __init__(self, parameterdict, parformats, default_formats, metadict, base=None, Cache=None): + def __init__( + self, + parameterdict, + parformats, + default_formats, + metadict, + base=None, + Cache=None, + ): """Set parameterdict defined by subclass parameterdict: A dictionary mapping string keys to their index in a @@ -82,24 +91,31 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No raise ValueError(emsg) BaseFunction.__init__(self, parameterdict, parformats, default_formats, metadict, base, Cache) - - #### "Virtual" class methods #### + # # "Virtual" class methods #### def scale_at(self, peak, x, scale): emsg = "scale_at must be implemented in a PeakFunction subclass." raise NotImplementedError(emsg) + # # Methods required by BaseFunction #### - #### Methods required by BaseFunction #### - - def actualize(self, pars, in_format="default_input", free=None, removable=True, static_owner=False): + def actualize( + self, + pars, + in_format="default_input", + free=None, + removable=True, + static_owner=False, + ): converted = self.transform_parameters(pars, in_format, out_format="internal") return Peak(self, converted, free, removable, static_owner) def getmodule(self): return __name__ -#end of class PeakFunction + +# end of class PeakFunction + class Peaks(ModelParts): """A collection for Peak objects.""" @@ -110,12 +126,12 @@ def __init__(self, *args, **kwds): def argsort(self, key="position"): """Return sequence of indices which sort peaks in order specified by key.""" - keypars=np.array([p[key] for p in self]) + keypars = np.array([p[key] for p in self]) # In normal use the peaks will already be sorted, so check for it. - sorted=True - for i in range(len(keypars)-1): - if keypars[i] > keypars[i+1]: - sorted=False + sorted = True + for i in range(len(keypars) - 1): + if keypars[i] > keypars[i + 1]: + sorted = False break if not sorted: return keypars.argsort().tolist() @@ -142,14 +158,14 @@ def match_at(self, x, y): orig = self.copy() try: - scale = y/height + scale = y / height # First attempt at scaling peaks. Record which peaks, if any, # were not scaled in case a second attempt is required. scaled = [] all_scaled = True any_scaled = False - fixed_height = 0. + fixed_height = 0.0 for peak in self: scaled.append(peak.scale_at(x, scale)) all_scaled = all_scaled and scaled[-1] @@ -161,27 +177,29 @@ def match_at(self, x, y): if not all_scaled and fixed_height < y and fixed_height < height: self[:] = orig[:] any_scaled = False - scale = (y - fixed_height)/(height - fixed_height) + scale = (y - fixed_height) / (height - fixed_height) for peak, s in (self, scaled): if s: # "or" is short-circuited, so scale_at() must be first # to guarantee it is called. any_scaled = peak.scale_at(x, scale) or any_scaled - except Exception, e: + except Exception as e: logger.debug("An exception prevented matching -- %s", e) self[:] = orig[:] return False return any_scaled - def sort(self, key="position"): + def sort(self, reverse=False, key="position"): """Sort peaks in order specified by key.""" - keypars=np.array([p[key] for p in self]) + keypars = np.array([p[key] for p in self]) order = keypars.argsort() self[:] = [self[idx] for idx in order] return + # End of class Peaks + class Peak(ModelPart): """Represents a single peak associated with a PeakFunction subclass.""" @@ -225,7 +243,7 @@ def scale_at(self, x, scale): try: adj_pars = self._owner.scale_at(self.pars, x, scale) - except SrMiseScalingError, err: + except SrMiseScalingError as err: logger.debug("Cannot scale peak:", err) return False @@ -244,22 +262,21 @@ def factory(peakstr, ownerlist): peakstr: string representing peak ownerlist: List of BaseFunctions that owner is in """ - from numpy import array data = peakstr.strip().splitlines() # dictionary of parameters pdict = {} for d in data: - l = d.split("=", 1) - if len(l) == 2: + parse_value = d.split("=", 1) + if len(parse_value) == 2: try: - pdict[l[0]] = eval(l[1]) + pdict[parse_value[0]] = eval(parse_value[1]) except Exception: - emsg = ("Invalid parameter: %s" %d) + emsg = "Invalid parameter: %s" % d raise SrMiseDataFormatError(emsg) else: - emsg = ("Invalid parameter: %s" %d) + emsg = "Invalid parameter: %s" % d raise SrMiseDataFormatError(emsg) # Correctly initialize the base function, if one exists. @@ -271,38 +288,39 @@ def factory(peakstr, ownerlist): return Peak(**pdict) + # End of class Peak # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn from diffpy.srmise.modelcluster import ModelCluster - from diffpy.srmise.modelevaluators import AICc - from diffpy.srmise.peaks import GaussianOverR + from diffpy.srmise.modelevaluators.aicc import AICc + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) #default unknown errors - pf = GaussianOverR(.7) + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = GaussianOverR(0.7) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y, r, yfit) diff --git a/src/diffpy/srmise/peaks/gaussian.py b/src/diffpy/srmise/peaks/gaussian.py new file mode 100644 index 0000000..b0112b0 --- /dev/null +++ b/src/diffpy/srmise/peaks/gaussian.py @@ -0,0 +1,442 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## + +import logging + +import numpy as np + +from diffpy.srmise.peaks.base import PeakFunction +from diffpy.srmise.srmiseerrors import SrMiseEstimationError, SrMiseScalingError, SrMiseTransformationError + +logger = logging.getLogger("diffpy.srmise") + + +class Gaussian(PeakFunction): + """Methods for evaluation and parameter estimation of width-limited Gaussian. + + Allowed formats are + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] + + The internal parameterization is unconstrained, but are interpreted + so that the width is between 0 and a user-provided maximum full width + at half maximum, and the area is positive. + + Note that all full width at half maximum values are for the + corresponding Gaussian. + """ + + # Possibly implement cutoff later, but low priority. + # cutoff=3/np.sqrt(2*np.log(2)) + # cutoff defines a distance = maxwidth*cutoff from the maximum beyond + # which the function is considered 0. By default this distance is + # equivalent to 3 standard deviations. + def __init__(self, maxwidth, Cache=None): + """maxwidth defined as full width at half maximum for the + corresponding Gaussian, which is physically relevant.""" + parameterdict = {"position": 0, "width": 1, "area": 2} + formats = ["internal", "pwa", "mu_sigma_area"] + default_formats = {"default_input": "internal", "default_output": "pwa"} + metadict = {} + metadict["maxwidth"] = (maxwidth, repr) + PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) + + if maxwidth <= 0: + emsg = "'maxwidth' must be greater than 0." + raise ValueError(emsg) + self.maxwidth = maxwidth + + # Useful constants ### + # c1 and c2 help with function values + self.c1 = self.maxwidth * np.sqrt(np.pi / (8 * np.log(2))) + self.c2 = self.maxwidth**2 / (8 * np.log(2)) + + # c3 and c4 help with parameter estimation + self.c3 = 0.5 * np.sqrt(np.pi / np.log(2)) + self.c4 = np.pi / (self.maxwidth * 2) + + # convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma + self.sigma2fwhm = 2 * np.sqrt(2 * np.log(2)) + + return + + # Methods required by PeakFunction #### + + def estimate_parameters(self, r, y): + """Estimate parameters for single peak from data provided. + + Parameters + ---------- + r : array-like + The data along r from which to estimate + y : array-like + The data along y from which to estimate + + Returns + ------- + array-like + Numpy array of parameters in the default internal format. + Raises SrMiseEstimationError if parameters cannot be estimated for any + reason. + """ + if len(r) != len(y): + emsg = "Arrays r, y must have equal length." + raise SrMiseEstimationError(emsg) + + logger.debug("Estimate peak using %s point(s)", len(r)) + + minpoints_required = 3 + + # filter out negative points + usable_idx = [i for i in range(len(y)) if y[i] > 0] + use_r = r[usable_idx] + use_y = y[usable_idx] + + if len(usable_idx) < minpoints_required: + emsg = "Not enough data for successful estimation." + raise SrMiseEstimationError(emsg) + + # Estimation #### + guesspars = np.array([0.0, 0.0, 0.0], dtype=float) + min_y = use_y.min() + max_y = use_y.max() + + if min_y != max_y: + weights = (use_y - min_y) ** 2 + guesspars[0] = np.sum(use_r * weights) / sum(weights) + # guesspars[0] = center + if use_y[0] < max_y: + sigma_left = np.sqrt(-0.5 * (use_r[0] - guesspars[0]) ** 2 / np.log(use_y[0] / max_y)) + else: + sigma_left = np.sqrt( + -0.5 + * np.mean(np.abs(np.array([use_r[0] - guesspars[0], use_r[-1] - guesspars[0]]))) ** 2 + / np.log(min_y / max_y) + ) + if use_y[-1] < max_y: + sigma_right = np.sqrt(-0.5 * (use_r[-1] - guesspars[0]) ** 2 / np.log(use_y[-1] / max_y)) + else: + sigma_right = np.sqrt( + -0.5 + * np.mean(np.abs(np.array([use_r[0] - guesspars[0], use_r[-1] - guesspars[0]]))) ** 2 + / np.log(min_y / max_y) + ) + guesspars[1] = 0.5 * (sigma_right + sigma_left) * self.sigma2fwhm + else: + # Procede cautiously if min_y == max_y. Without other information + # we choose the center of the cluster as the peak center, and make + # sure the peak has died down by the time it reaches the edge of + # the data. + guesspars[0] = (use_r[0] + use_r[-1]) / 2 + guesspars[1] = (use_r[-1] - use_r[0]) * 2 / (2 * np.log(2)) # cluster width/2=2*sigma + + if guesspars[1] > self.maxwidth: + # account for width-limit + guesspars[2] = self.c3 * max_y * self.maxwidth + guesspars[1] = np.pi / 2 # parameterized in terms of sin + else: + guesspars[2] = self.c3 * max_y * guesspars[1] + guesspars[1] = np.arcsin( + 2 * guesspars[1] ** 2 / self.maxwidth**2 - 1.0 + ) # parameterized in terms of sin + + return guesspars + + def scale_at(self, pars, x, scale): + """Change parameters so value(x)->scale*value(x). + + Does not change position or height of peak's maxima. Raises + SrMiseScalingError if the parameters cannot be scaled. + + Parameters + ---------- + pars : array-like + The parameters corresponding to a single peak + x : float + The position of the border + scale : float + The size of scaling at x. Must be positive. + + Returns + ------- + tuple + mu, area, and sigma that are scaled.""" + if scale <= 0: + emsg = "".join(["Cannot scale by ", str(scale), "."]) + raise SrMiseScalingError(emsg) + + if scale == 1: + return pars + else: + ratio = 1 / scale # Ugly: Equations orig. solved in terms of ratio + + tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") + + # solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) + # 2. f(x;mu1,sigma1,area1)=ratio*f(x;mu1,sigma2,area2) + # 3. mu1=mu2=rmax (the maximum of a Gaussian occurs at r=mu) + # for mu2, sigma2, area2 (with appropriate unit conversions to fwhm at the end). + # The expression for rmax is the appropriate solution to df/dr=0 + mu1, sigma1, area1 = tpars + + # the semi-nasty algebra reduces to something nice + mu2 = mu1 + area2 = np.sqrt(area1**2 / (2 * np.log(ratio) * sigma1**2 / (x - mu1) ** 2 + 1)) + sigma2 = sigma1 * area2 / area1 + + tpars[0] = mu2 + tpars[1] = sigma2 + tpars[2] = area2 + try: + tpars = self.transform_parameters(tpars, in_format="mu_sigma_area", out_format="internal") + except SrMiseTransformationError as err: + raise SrMiseScalingError(str(err)) + return tpars + + def _jacobianraw(self, pars, r, free): + """Compute the Jacobian of a width-limited Gaussian function. + + This method calculates the partial derivatives of a Gaussian function + with respect to its parameters, considering a limiting width. The Gaussian's + width approaches its maximum FWHM (maxwidth) as the effective width parameter + (`pars[1]`) tends to infinity. + + Parameters + ---------- + pars : array-like + The sequence of parameters defining a single width-limited Gaussian: + - pars[0]: Peak position. + - pars[1]: Effective width, which scales up to the full width at half maximum (fwhm=maxwidth) as + `pars[1]` approaches infinity. It is mathematically represented as `tan(pi/2 * fwhm / maxwidth)`. + - pars[2]: Multiplicative constant 'a', equivalent to the peak area. + + r : array-like or scalar + The sequence or scalar over which the Gaussian parameters `pars` are evaluated. + + free : array-like of bools + Determines which derivatives need to be computed. A `True` value indicates that the derivative + with respect to the corresponding parameter in `pars` should be calculated; + `False` indicates no evaluation is needed. + + Returns + ------- + jacobian : ndarray + The Jacobian matrix, where each column corresponds to the derivative of the Gaussian function + with respect to one of the input parameters `pars`, evaluated at points `r`. + Only columns corresponding to `True` values in `free` are computed. + """ + jacobian = [None, None, None] + if (free is False).sum() == self.npars: + return jacobian + + # Optimization + sin_p = np.sin(pars[1]) + 1.0 + p0minusr = pars[0] - r + exp_p = np.exp(-((p0minusr) ** 2) / (self.c2 * sin_p)) / (self.c1 * np.sqrt(sin_p)) + + if free[0]: + # derivative with respect to peak position + jacobian[0] = -2.0 * exp_p * p0minusr * np.abs(pars[2]) / (self.c2 * sin_p) + if free[1]: + # derivative with respect to reparameterized peak width + jacobian[1] = ( + -exp_p + * np.abs(pars[2]) + * np.cos(pars[1]) + * (self.c2 * sin_p - 2 * p0minusr**2) + / (2.0 * self.c2 * sin_p**2) + ) + if free[2]: + # derivative with respect to peak area + # abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has + # non-zero area and the function is paramaterized such that values of either sign represent equivalent + # curves I arbitrarily choose positive sign for pars[2]==0 in order to + # push the system back into a realistic parameter space should this improbable scenario occur. + # jacobian[2] = sign(pars[2])*exp_p + if pars[2] >= 0: + jacobian[2] = exp_p + else: + jacobian[2] = -exp_p + return jacobian + + def _transform_parametersraw(self, pars, in_format, out_format): + """Convert parameter values from one format to another. + + This method also facilitates restoring parameters to a preferred range if the + target format allows for multiple representations of the same physical result. + + Parameters + ---------- + pars : array_like + The sequence of parameters in the `in_format`. + in_format : str, optional + The input format of the parameters. Supported formats are: + - 'internal': [position, parameterized width-squared, area] + - 'pwa': [position, full width at half maximum, area] + - 'mu_sigma_area': [mu, sigma, area] + Default is 'internal'. + out_format : str, optional + The desired output format of the parameters. Same options as `in_format`. + Default is 'pwa'. + + Returns + ------- + array_like + The transformed parameters in the `out_format`. + """ + temp = np.array(pars) + + # Do I need to change anything? The internal parameters may need to be + # placed into the preferred range, even though their interpretation does + # not change. + if in_format == out_format and in_format != "internal": + return pars + + # Convert to intermediate format "internal" + if in_format == "internal": + # put the parameter for width in the "physical" quadrant [-pi/2,pi/2], + # where .5*(sin(p)+1) covers fwhm = [0, maxwidth] + n = np.floor((temp[1] + np.pi / 2) / np.pi) + if np.mod(n, 2) == 0: + temp[1] = temp[1] - np.pi * n + else: + temp[1] = np.pi * n - temp[1] + temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one + elif in_format == "pwa": + if temp[1] > self.maxwidth: + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + temp[1], + self.maxwidth, + ) + raise SrMiseTransformationError(emsg) + temp[1] = np.arcsin(2.0 * temp[1] ** 2 / self.maxwidth**2 - 1.0) + elif in_format == "mu_sigma_area": + fwhm = temp[1] * self.sigma2fwhm + if fwhm > self.maxwidth: + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + fwhm, + self.maxwidth, + ) + raise SrMiseTransformationError(emsg) + temp[1] = np.arcsin(2.0 * fwhm**2 / self.maxwidth**2 - 1.0) + else: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + + # Convert to specified output format from "internal" format. + if out_format == "internal": + pass + elif out_format == "pwa": + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) + elif out_format == "mu_sigma_area": + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) / self.sigma2fwhm + else: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + return temp + + def _valueraw(self, pars, r): + """Compute the value of a width-limited Gaussian for the specified parameters at given radial distances. + + This function calculates the value of a Gaussian distribution, where its effective width is constrained and + related to the maxwidth. As `pars[1]` approaches infinity, + the effective width reaches `FWHM` (maxwidth). The returned values represent the Gaussian's intensity + across the provided radial coordinates `r`. + + Parameters + ---------- + pars : array_like + A sequence of parameters defining the Gaussian shape: + - pars[0]: Peak position of the Gaussian. + - pars[1]: Effective width factor, approaching infinity implies the FWHM equals `maxwidth`. + It is related to the FWHM by `tan(pi/2*FWHM/maxwidth)`. + - pars[2]: Multiplicative constant 'a', equivalent to the peak area of the Gaussian when integrated. + + r : array_like or float + The radial distances or a single value at which the Gaussian is to be evaluated. + + Returns + ------- + float + The value of a width-limited Gaussian for the specified parameters at given radial distances. + """ + return ( + np.abs(pars[2]) + / (self.c1 * np.sqrt(np.sin(pars[1]) + 1.0)) + * np.exp(-((r - pars[0]) ** 2) / (self.c2 * (np.sin(pars[1]) + 1.0))) + ) + + def getmodule(self): + return __name__ + + # Other methods #### + + def max(self, pars): + """Return position and height of the peak maximum. + Parameters + ---------- + pars : array_like + A sequence of parameters defining the Gaussian shape. + + Returns + ------- + array_like + The position and height of the peak maximum.""" + # TODO: Reconsider this behavior + if len(pars) == 0: + return None + + # Transform parameters for convenience. + tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") + + rmax = tpars[0] + ymax = self._valueraw(pars, rmax) + return np.array([rmax, ymax]) + + +# end of class Gaussian + +# simple test code +if __name__ == "__main__": + + import matplotlib.pyplot as plt + from numpy.random import randn + + from diffpy.srmise.modelcluster import ModelCluster + from diffpy.srmise.modelevaluators.aicc import AICc + from diffpy.srmise.peaks.base import Peaks + + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = Gaussian(0.7) + evaluator = AICc() + + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] + ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) + + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] + guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) + cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) + + qual1 = cluster.quality() + print(qual1.stat) + cluster.fit() + yfit = cluster.calc() + qual2 = cluster.quality() + print(qual2.stat) + + plt.figure(1) + plt.plot(r, y, r, yfit) + plt.show() diff --git a/src/diffpy/srmise/peaks/gaussianoverr.py b/src/diffpy/srmise/peaks/gaussianoverr.py new file mode 100644 index 0000000..2cf9d65 --- /dev/null +++ b/src/diffpy/srmise/peaks/gaussianoverr.py @@ -0,0 +1,514 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## + +import logging + +import numpy as np + +from diffpy.srmise.peaks.base import PeakFunction +from diffpy.srmise.srmiseerrors import SrMiseEstimationError, SrMiseScalingError, SrMiseTransformationError + +logger = logging.getLogger("diffpy.srmise") + + +class GaussianOverR(PeakFunction): + """Methods for evaluation and parameter estimation of width-limited Gaussian/r. + + Allowed formats are + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] + + The internal parameterization is unconstrained, but are interpreted + so that the width is between 0 and a user-provided maximum full width + at half maximum, and the area is positive. + + Note that all full width at half maximum values are for the + corresponding Gaussian. + """ + + # Possibly implement cutoff later, but low priority. + # cutoff=3/np.sqrt(2*np.log(2)) + # cutoff defines a distance = maxwidth*cutoff from the maximum beyond + # which the function is considered 0. By default this distance is + # equivalent to 3 standard deviations. + def __init__(self, maxwidth, Cache=None): + """maxwidth defined as full width at half maximum for the + corresponding Gaussian, which is physically relevant.""" + parameterdict = {"position": 0, "width": 1, "area": 2} + formats = ["internal", "pwa", "mu_sigma_area"] + default_formats = {"default_input": "internal", "default_output": "pwa"} + metadict = {} + metadict["maxwidth"] = (maxwidth, repr) + PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) + + if maxwidth <= 0: + emsg = "'maxwidth' must be greater than 0." + raise ValueError(emsg) + self.maxwidth = maxwidth + + # Useful constants ### + # c1 and c2 help with function values + self.c1 = self.maxwidth * np.sqrt(np.pi / (8 * np.log(2))) + self.c2 = self.maxwidth**2 / (8 * np.log(2)) + + # c3 and c4 help with parameter estimation + self.c3 = 0.5 * np.sqrt(np.pi / np.log(2)) + self.c4 = np.pi / (self.maxwidth * 2) + + # convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma + self.sigma2fwhm = 2 * np.sqrt(2 * np.log(2)) + + return + + # Methods required by PeakFunction #### + + def estimate_parameters(self, r, y): + """Estimate parameters for single peak from data provided. + + Parameters + ---------- + r : array-like + Data along r from which to estimate + y : array-like + Data along y from which to estimate + + Returns + ------- + array-like + Numpy array of parameters in the default internal format. + Raises SrMiseEstimationError if parameters cannot be estimated for any + reason. + """ + if len(r) != len(y): + emsg = "Arrays r, y must have equal length." + raise SrMiseEstimationError(emsg) + + logger.debug("Estimate peak using %s point(s)", len(r)) + + minpoints_required = 3 + + # filter out negative points + usable_idx = [i for i in range(len(y)) if y[i] > 0] + use_r = r[usable_idx] + use_y = y[usable_idx] + + if len(usable_idx) < minpoints_required: + emsg = "Not enough data for successful estimation." + raise SrMiseEstimationError(emsg) + + # Estimation #### + guesspars = np.array([0.0, 0.0, 0.0], dtype=float) + min_y = use_y.min() + max_y = use_y.max() + + if min_y != max_y: + weights = (use_y - min_y) ** 2 + guesspars[0] = np.sum(use_r * weights) / sum(weights) + # guesspars[0] = center + if use_y[0] < max_y: + sigma_left = np.sqrt(-0.5 * (use_r[0] - guesspars[0]) ** 2 / np.log(use_y[0] / max_y)) + else: + sigma_left = np.sqrt( + -0.5 + * np.mean(np.abs(np.array([use_r[0] - guesspars[0], use_r[-1] - guesspars[0]]))) ** 2 + / np.log(min_y / max_y) + ) + if use_y[-1] < max_y: + sigma_right = np.sqrt(-0.5 * (use_r[-1] - guesspars[0]) ** 2 / np.log(use_y[-1] / max_y)) + else: + sigma_right = np.sqrt( + -0.5 + * np.mean(np.abs(np.array([use_r[0] - guesspars[0], use_r[-1] - guesspars[0]]))) ** 2 + / np.log(min_y / max_y) + ) + guesspars[1] = 0.5 * (sigma_right + sigma_left) * self.sigma2fwhm + else: + # Procede cautiously if min_y == max_y. Without other information + # we choose the center of the cluster as the peak center, and make + # sure the peak has died down by the time it reaches the edge of + # the data. + guesspars[0] = (use_r[0] + use_r[-1]) / 2 + guesspars[1] = (use_r[-1] - use_r[0]) * 2 / (2 * np.log(2)) # cluster width/2=2*sigma + + if guesspars[1] > self.maxwidth: + # account for width-limit + guesspars[2] = self.c3 * max_y * guesspars[0] * self.maxwidth + guesspars[1] = np.pi / 2 # parameterized in terms of sin + else: + guesspars[2] = self.c3 * max_y * guesspars[0] * guesspars[1] + guesspars[1] = np.arcsin( + 2 * guesspars[1] ** 2 / self.maxwidth**2 - 1.0 + ) # parameterized in terms of sin + + return guesspars + + def scale_at(self, pars, x, scale): + """Change parameters so value(x)->scale*value(x). + + Does not change position or height of peak's maxima. Raises + SrMiseScalingError if the parameters cannot be scaled. + + Parameters + ---------- + pars : array-like + Parameters corresponding to a single peak + x : float + Position of the border + scale : float + Size of scaling at x. Must be positive. + + Returns + ------- + array-like + The sequence of scaled parameters. + """ + if scale <= 0: + emsg = "".join(["Cannot scale by ", str(scale), "."]) + raise SrMiseScalingError(emsg) + + if scale == 1: + return pars + else: + ratio = 1 / scale # Ugly: Equations orig. solved in terms of ratio + + tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") + + # solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) + # 2. f(x;mu1,sigma1,area1)=ratio*f(x;mu1,sigma2,area2) + # 3. 1/2*(mu1+sqrt(mu1^2+sigma1^2))=1/2*(mu2+sqrt(mu2^2+sigma2^2))=rmax + # for mu2, sigma2, area2 (with appropriate unit conversions to fwhm at the end). + # The expression for rmax is the appropriate solution to df/dr=0 + mu1, sigma1, area1 = tpars + + # position of the peak maximum + try: + rmax = self.max(pars)[0] + except ValueError as err: + raise SrMiseScalingError(str(err)) + + # lhs of eqn1/eqn2 multiplied by ratio. Then take the log. + log_ratio_prime = np.log(ratio) + (x - rmax) * (x - 2 * mu1 + rmax) / (2 * sigma1**2) + + # the semi-nasty algebra reduces to something nice + sigma2 = np.sqrt(0.5 * rmax * (x - rmax) ** 2 / (x - rmax + rmax * log_ratio_prime)) + mu2 = (sigma2**2 + rmax**2) / rmax + area2 = ( + area1 + * (sigma2 / sigma1) + * np.exp(-((rmax - mu1) ** 2) / (2 * sigma1**2)) + / np.exp(-((rmax - mu2) ** 2) / (2 * sigma2**2)) + ) + + tpars[0] = mu2 + tpars[1] = sigma2 + tpars[2] = area2 + try: + tpars = self.transform_parameters(tpars, in_format="mu_sigma_area", out_format="internal") + except SrMiseTransformationError as err: + raise SrMiseScalingError(str(err)) + return tpars + + def _jacobianraw(self, pars, r, free): + """ + Compute the Jacobian of a width-limited Gaussian/r function. + + This method calculates the partial derivatives of a Gaussian/r function + with respect to its parameters, considering a limiting width. The Gaussian/r's + width approaches its maximum FWHM (maxwidth) as the effective width parameter + (`pars[1]`) tends to infinity. + + Parameters + ---------- + pars : array-like + Sequence of parameters defining a single width-limited Gaussian: + - pars[0]: Peak position. + - pars[1]: Effective width, which scales up to the full width at half maximum (fwhm=maxwidth) as + `pars[1]` approaches infinity. It is mathematically represented as `tan(pi/2 * fwhm / maxwidth)`. + - pars[2]: Multiplicative constant 'a', equivalent to the peak area. + r : array-like or scalar + The sequence or scalar over which the Gaussian parameters `pars` are evaluated. + free : array-like of bools + Determines which derivatives need to be computed. A `True` value indicates that the derivative + with respect to the corresponding parameter in `pars` should be calculated; + `False` indicates no evaluation is needed. + Returns + ------- + jacobian : ndarray + The Jacobian matrix, where each column corresponds to the derivative of the Gaussian/r function + with respect to one of the input parameters `pars`, evaluated at points `r`. + Only columns corresponding to `True` values in `free` are computed. + """ + jacobian = [None, None, None] + if np.sum(np.logical_not(free)) == self.npars: + return jacobian + + # Optimization + sin_p = np.sin(pars[1]) + 1.0 + p0minusr = pars[0] - r + exp_p = np.exp(-((p0minusr) ** 2) / (self.c2 * sin_p)) / (np.abs(r) * self.c1 * np.sqrt(sin_p)) + + if free[0]: + # derivative with respect to peak position + jacobian[0] = -2.0 * exp_p * p0minusr * np.abs(pars[2]) / (self.c2 * sin_p) + if free[1]: + # derivative with respect to reparameterized peak width + jacobian[1] = ( + -exp_p + * np.abs(pars[2]) + * np.cos(pars[1]) + * (self.c2 * sin_p - 2 * p0minusr**2) + / (2.0 * self.c2 * sin_p**2) + ) + if free[2]: + # derivative with respect to peak area + # abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has + # non-zero area and the function is paramaterized such that values of either sign represent equivalent + # curves I arbitrarily choose positive sign for pars[2]==0 in order to + # push the system back into a realistic parameter space should this improbable scenario occur. + # jacobian[2] = sign(pars[2])*exp_p + if pars[2] >= 0: + jacobian[2] = exp_p + else: + jacobian[2] = -exp_p + return jacobian + + def _transform_derivativesraw(self, pars, in_format, out_format): + """Return gradient matrix for the pars converted from in_format to out_format. + + Parameters + pars: Sequence of parameters + in_format: A format defined for this class + out_format: A format defined for this class + + Defined Formats + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] + """ + # With these three formats only the width-related parameter changes. + # Therefore the gradient matrix is the identity matrix with the possible + # exception of the element at [1,1]. + g = np.identity(self.npars) + + if in_format == out_format: + return + + if in_format == "internal": + if out_format == "pwa": + g[1, 1] = self.maxwidth / (2 * np.sqrt(2)) * np.cos(pars[1]) / np.sqrt(1 + np.sin(pars[1])) + elif out_format == "mu_sigma_area": + g[1, 1] = ( + self.maxwidth + / (2 * np.sqrt(2) * self.sigma2fwhm) + * np.cos(pars[1]) + / np.sqrt(1 + np.sin(pars[1])) + ) + else: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + elif in_format == "pwa": + if out_format == "internal": + g[1, 1] = 2 / np.sqrt(self.maxwidth**2 - pars[1] ** 2) + elif out_format == "mu_sigma_area": + g[1, 1] = 1 / self.sigma2fwhm + else: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + elif in_format == "mu_sigma_area": + if out_format == "internal": + g[1, 1] = 2 * self.sigma2fwhm / np.sqrt(self.maxwidth**2 - (self.sigma2fwhm * pars[1]) ** 2) + elif out_format == "pwa": + g[1, 1] = self.sigma2fwhm + else: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + else: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + + return g + + def _transform_parametersraw(self, pars, in_format, out_format): + """Convert parameter values from in_format to out_format. + + This method convert parameter values from one format to another and optionally restore + them to a preferred range if the target format supports multiple values + representing the same physical result. + + Parameters + ---------- + pars : array_like + Sequence of parameters in the `in_format`. + in_format : str, optional + The input format of the parameters. Supported formats are: + - 'internal': [position, parameterized width-squared, area] + - 'pwa': [position, full width at half maximum, area] + - 'mu_sigma_area': [mu, sigma, area] + Default is 'internal'. + out_format : str, optional + The desired output format of the parameters. Same options as `in_format`. + Default is 'pwa'. + + Returns + ------- + array_like + The transformed parameters in the `out_format`. + """ + temp = np.array(pars) + + # Do I need to change anything? The internal parameters may need to be + # placed into the preferred range, even though their interpretation does + # not change. + if in_format == out_format and in_format != "internal": + return pars + + # Convert to intermediate format "internal" + if in_format == "internal": + # put the parameter for width in the "physical" quadrant [-pi/2,pi/2], + # where .5*(sin(p)+1) covers fwhm = [0, maxwidth] + n = np.floor((temp[1] + np.pi / 2) / np.pi) + if np.mod(n, 2) == 0: + temp[1] = temp[1] - np.pi * n + else: + temp[1] = np.pi * n - temp[1] + temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one + elif in_format == "pwa": + if temp[1] > self.maxwidth: + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + temp[1], + self.maxwidth, + ) + raise SrMiseTransformationError(emsg) + temp[1] = np.arcsin(2.0 * temp[1] ** 2 / self.maxwidth**2 - 1.0) + elif in_format == "mu_sigma_area": + fwhm = temp[1] * self.sigma2fwhm + if fwhm > self.maxwidth: + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + fwhm, + self.maxwidth, + ) + raise SrMiseTransformationError(emsg) + temp[1] = np.arcsin(2.0 * fwhm**2 / self.maxwidth**2 - 1.0) + else: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + + # Convert to specified output format from "internal" format. + if out_format == "internal": + pass + elif out_format == "pwa": + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) + elif out_format == "mu_sigma_area": + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) / self.sigma2fwhm + else: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + return temp + + def _valueraw(self, pars, r): + """Compute the value of a width-limited Gaussian/r for the specified parameters at given radial distances. + + This function calculates the value of a Gaussian/r distribution, + where its effective width is constrained and related to the maxwidth. As `pars[1]` approaches infinity, + the effective width reaches `FWHM` (maxwidth). The returned values represent the Gaussian's intensity + across the provided radial coordinates `r`. + + Parameters + ---------- + pars : array_like + A sequence of parameters defining the Gaussian shape: + - pars[0]: Peak position of the Gaussian. + - pars[1]: Effective width factor, approaching infinity implies the FWHM equals `maxwidth`. + It is related to the FWHM by `tan(pi/2*FWHM/maxwidth)`. + - pars[2]: Multiplicative constant 'a', equivalent to the peak area of the Gaussian when integrated. + r : array_like or float + Radial distances or a single value at which the Gaussian is to be evaluated. + Returns + ------- + float + The value of a width-limited Gaussian for the specified parameters at given radial distances. + """ + return ( + np.abs(pars[2]) + / (np.abs(r) * self.c1 * np.sqrt(np.sin(pars[1]) + 1.0)) + * np.exp(-((r - pars[0]) ** 2) / (self.c2 * (np.sin(pars[1]) + 1.0))) + ) + + def getmodule(self): + return __name__ + + # Other methods #### + + def max(self, pars): + """Return position and height of the peak maximum. + + Parameters + ---------- + pars : array_like + The sequence of parameters defining the Gaussian shape. + + Returns + ------- + array-like + The sequence of position and height of the peak maximum.""" + # TODO: Reconsider this behavior + if len(pars) == 0: + return None + + # Transform parameters for convenience. + tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") + + # The Gaussian/r only has a local maximum under this condition. + # Physically realistic peaks will always meet this condition, but + # trying to fit a signal down to r=0 could conceivably lead to issues. + if tpars[0] ** 2 <= 4 * tpars[1] ** 2: + emsg = "".join(["No local maximum with parameters\n", str(pars)]) + raise ValueError(emsg) + + rmax = 0.5 * (tpars[0] + np.sqrt(tpars[0] ** 2 - 4 * tpars[1] ** 2)) + ymax = self._valueraw(pars, rmax) + return np.array([rmax, ymax]) + + +# end of class GaussianOverR + +# simple test code +if __name__ == "__main__": + + import matplotlib.pyplot as plt + from numpy.random import randn + + from diffpy.srmise.modelcluster import ModelCluster + from diffpy.srmise.modelevaluators.aicc import AICc + from diffpy.srmise.peaks.base import Peaks + + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = GaussianOverR(0.7) + evaluator = AICc() + + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] + ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) + + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] + guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) + cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) + + qual1 = cluster.quality() + print(qual1.stat) + cluster.fit() + yfit = cluster.calc() + qual2 = cluster.quality() + print(qual2.stat) + + plt.figure(1) + plt.plot(r, y, r, yfit) + plt.show() diff --git a/src/diffpy/srmise/peaks/terminationripples.py b/src/diffpy/srmise/peaks/terminationripples.py new file mode 100644 index 0000000..c516c96 --- /dev/null +++ b/src/diffpy/srmise/peaks/terminationripples.py @@ -0,0 +1,412 @@ +#!/usr/bin/env python +############################################################################## +# +# SrMise by Luke Granlund +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York +# All rights reserved. +# +# File coded by: Luke Granlund +# +# See LICENSE.txt for license information. +# +############################################################################## + +import logging + +import numpy as np +import scipy.fftpack as fp + +from diffpy.srmise.peaks.base import PeakFunction + +logger = logging.getLogger("diffpy.srmise") + + +class TerminationRipples(PeakFunction): + """Methods for evaluation and parameter estimation of a peak function with termination ripples.""" + + def __init__(self, base, qmax, extension=4.0, supersample=5.0, Cache=None): + """Peak function constructor which adds termination ripples to existing function. + + Unlike other peak functions, TerminationRipples can only be evaluated + over a uniform grid, or at a single value using an ad hoc uniform grid + defined by qmax, extension, and supersample. + + Parameters + ---------- + base : PeakFunction instance + The PeakFunction instance subclass. + qmax : float + The cut-off frequency in reciprocal space. + extension : float + How many multiples of 2pi/qmax to extend calculations in + order to avoid edge effects. Default is 4.0. + supersample : float + Number intervals over 2pi/qmax when a natural interval + cannot be determined while extending calculations. Default is 5.0. + Cache : class + The class (not instance) which implements caching of PeakFunction + evaluations.""" + parameterdict = base.parameterdict + formats = base.parformats + default_formats = base.default_formats + self.base = base + self.qmax = qmax + self.extension = extension + self.supersample = supersample + metadict = {} + metadict["qmax"] = (qmax, repr) + metadict["extension"] = (extension, repr) + metadict["supersample"] = (supersample, repr) + PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, base, Cache) + return + + # Methods required by PeakFunction #### + + # TODO: A smart way to convert from the basefunctions estimate to an + # appropriate one when ripples are considered. This may not be necessary, + # though. + def estimate_parameters(self, r, y): + """Estimate parameters for single peak from data provided. + + Uses estimation routine provided by base peak function. + + Parameters + ---------- + r : array-like + Data along r from which to estimate + y : array-like + Data along y from which to estimate + + Returns + ------- + array-like + Numpy array of parameters in the default internal format. + Raises SrMiseEstimationError if parameters cannot be estimated for any + reason. + """ + return self.base.estimate_parameters(r, y) + + # TODO: Can this be implemented sanely for termination ripples? + def scale_at(self, pars, x, scale): + """Change parameters so value(x)->scale*value(x) for the base function. + + Does not change position or height of peak's maxima. Raises + SrMiseScalingError if the parameters cannot be scaled. + + Parameters + ---------- + pars : array-like + The parameters corresponding to a single peak + x : float + The position of the border + scale : float + The size of scaling at x. Must be positive. + + Returns + ------- + array-like + The numpy array of scaled parameters. + """ + return self.base.scale_at(pars, x, scale) + + def _jacobianraw(self, pars, r, free): + """Return Jacobian of base function with termination ripples. + + Parameters + ---------- + pars : array-like + The sequence of parameters for a single peak + r : array-like + The sequence or scalar over which pars is evaluated + free : array-like + The sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation. + + Returns + ------- + array-like + The Jacobian matrix of base function with termination ripples. + """ + return self.base._jacobianraw(pars, r, free) + + def _transform_derivativesraw(self, pars, in_format, out_format): + """Return gradient matrix for the pars converted from in_format to out_format. + + Parameters + ---------- + pars : array-like + The sequence of parameters + in_format : str + The format defined for base peak function + out_format : str + The format defined for base peak function + + Returns + ------- + ndarray + The Jacobian matrix of base function with termination ripples with out_format. + """ + return self.base._transform_derivativesraw(pars, in_format, out_format) + + def _transform_parametersraw(self, pars, in_format, out_format): + """Convert parameter values from in_format to out_format. + + Parameters + ---------- + pars : array-like + The sequence of parameters + in_format : str + The format defined for base peak function + out_format : str + The format defined for base peak function + + Returns + ------- + array-like + The sequence of parameter values with out_format. + """ + return self.base._transform_parametersraw(pars, in_format, out_format) + + def _valueraw(self, pars, r): + """Return value of base peak function for the given parameters and r values. + + pars : array-like + The sequence of parameters for a single peak + r : array-like or float + The sequence or scalar over which pars is evaluated + + Returns + ------- + float + The value of base peak function for the given parameters and r.""" + return self.base._valueraw(pars, r) + + # Overridden PeakFunction functions #### + # jacobian() and value() are not normally overridden by PeakFunction + # subclasses, but are here to minimize the effect of edge-effects while + # introducing termination ripples. + + def jacobian(self, peak, r, rng=None): + """Calculate (rippled) jacobian, possibly restricted by range. + + Parameters + ---------- + peak : PeakFunction instance + The Peak to be evaluated + r : array-like + The sequence or scalar over which peak is evaluated + rng : slice object + Optional slice object restricts which r-values are evaluated. + The output has same length as r, but unevaluated objects have + a default value of 0. If caching is enabled these may be + previously calculated values instead. Default is None + + Returns + ------- + jac : array-like + The Jacobian of base function with termination ripples.""" + if self is not peak._owner: + raise ValueError( + "Argument 'peak' must be evaluated by the " + "PeakFunction subclass instance with which " + "it is associated." + ) + + # normally r will be a sequence, but also allow single numeric values + try: + if len(r) > 1: + dr = (r[-1] - r[0]) / (len(r) - 1) + else: + # dr is ad hoc if r is a single point + dr = 2 * np.pi / (self.supersample * self.qmax) + + if rng is None: + rng = slice(0, len(r)) + rpart = r[rng] + (ext_r, ext_slice) = self.extend_grid(rpart, dr) + jac = self._jacobianraw(peak.pars, ext_r, peak.free) + output = [None for j in jac] + for idx in range(len(output)): + if jac[idx] is not None: + jac[idx] = self.cut_freq(jac[idx], dr) + output[idx] = r * 0.0 + output[idx][rng] = jac[idx][ext_slice] + return output + except TypeError: + # dr is ad hoc if r is a single point. + dr = 2 * np.pi / (self.supersample * self.qmax) + (ext_r, ext_slice) = self.extend_grid(np.array([r]), dr) + jac = self._jacobianraw(peak.pars, ext_r, peak.free) + for idx in range(len(output)): + if jac[idx] is not None: + jac[idx] = self.cut_freq(jac[idx], dr)[ext_slice][0] + return jac + + def value(self, peak, r, rng=None): + """Calculate (rippled) value of peak, possibly restricted by range. + + This function overrides its counterpart in PeakFunction in order + to minimize the impact of edge-effects from introducing termination + ripples into an existing peak function. + + Parameters + ---------- + peak : Peak instance + The Peak to be evaluated + r : array-like + The sequence or scalar over which peak is evaluated + rng : slice object + Optional slice object restricts which r-values are evaluated. + The output has same length as r, but unevaluated objects have + a default value of 0. If caching is enabled these may be + previously calculated values instead. Default is None. + + Returns + ------- + output : array-like + The (rippled) value of peak, possibly restricted by range. + """ + if self is not peak._owner: + raise ValueError( + "Argument 'peak' must be evaluated by the " + "PeakFunction subclass instance with which " + "it is associated." + ) + + # normally r will be a sequence, but also allow single numeric values + + dr_super = 2 * np.pi / (self.supersample * self.qmax) + if np.isscalar(r): + # dr is ad hoc if r is a single point. + (ext_r, ext_slice) = self.extend_grid(np.array([r]), dr_super) + value = self._valueraw(peak.pars, ext_r) + value = self.cut_freq(value, dr_super) + return value[ext_slice][0] + else: + if rng is None: + rng = slice(0, len(r)) + + output = r * 0.0 + + # Make sure the actual dr used for finding termination ripples + # is at least as fine as dr_super, while still calculating the + # function at precisely the requested points. + # When the underlying function is sampled too coarsely it can + # miss critical high frequency components and return a very + # poor approximation to the continuous case. The actual fineness + # of sampling needed to avoid the worst of these discretization + # issues is difficult to determine without detailed knowledge + # of the underlying function. + dr = (r[-1] - r[0]) / (len(r) - 1) + segments = np.ceil(dr / dr_super) + dr_segmented = dr / segments + + rpart = r[rng] + if segments > 1: + rpart = np.arange(rpart[0], rpart[-1] + dr_segmented / 2, dr_segmented) + + (ext_r, ext_slice) = self.extend_grid(rpart, dr_segmented) + value = self._valueraw(peak.pars, ext_r) + value = self.cut_freq(value, dr_segmented) + output[rng] = value[ext_slice][::segments] + + return output + + def getmodule(self): + return __name__ + + # Other methods #### + + def cut_freq(self, sequence, delta): + """Remove high-frequency components from sequence. + + This is equivalent to the discrete convolution of a signal with a sinc + function sin(2*pi*r/qmax)/r. + + Parameters + ---------- + sequence : array-like + The sequence to alter. + delta : int + The spacing between elements in sequence. + + Returns + ------- + array-like + The sequence with high-frequency components removed. + """ + padlen = int(2 ** np.ceil(np.log2(len(sequence)))) + padseq = fp.fft(sequence, padlen) + dq = 2 * np.pi / ((padlen - 1) * delta) + lowidx = int(np.ceil(self.qmax / dq)) + hiidx = padlen + 1 - lowidx + + # Remove hi-frequency components + padseq[lowidx:hiidx] = 0 + + padseq = fp.ifft(padseq) + return np.real(padseq[0 : len(sequence)]) + + def extend_grid(self, r, dr): + """Return (extended r, slice giving original range). + + Parameters + ---------- + r : array-like or float + The sequence or scalar over which peak is evaluated + dr : array-like or float + The uncertainties over which peak is evaluated + + Returns + ------- + tuple + The extended r, slice giving original range.""" + ext = self.extension * 2 * np.pi / self.qmax + left_ext = np.arange(r[0] - dr, max(0.0, r[0] - ext - dr), -dr)[::-1] + right_ext = np.arange(r[-1] + dr, r[-1] + ext + dr, dr) + ext_r = np.concatenate((left_ext, r, right_ext)) + ext_slice = slice(len(left_ext), len(ext_r) - len(right_ext)) + return (ext_r, ext_slice) + + +# end of class TerminationRipples + +# simple test code +if __name__ == "__main__": + + import matplotlib.pyplot as plt + from numpy.random import randn + + from diffpy.srmise.modelcluster import ModelCluster + from diffpy.srmise.modelevaluators.aicc import AICc + from diffpy.srmise.peaks.base import Peaks + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR + + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf1 = GaussianOverR(0.7) + pf2 = TerminationRipples(pf1, 20.0) + evaluator = AICc() + + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] + ideal_peaks = Peaks([pf1.actualize(p, "pwa") for p in pars]) + ripple_peaks = Peaks([pf2.actualize(p, "pwa") for p in pars]) + y_ideal = ideal_peaks.value(r) + y_ripple = ripple_peaks.value(r) + 0.1 * randn(len(r)) + + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] + guess_peaks = Peaks([pf2.actualize(p, "pwa") for p in guesspars]) + cluster = ModelCluster(guess_peaks, r, y_ripple, err, None, AICc, [pf2]) + + qual1 = cluster.quality() + print(qual1.stat) + cluster.fit() + yfit = cluster.calc() + qual2 = cluster.quality() + print(qual2.stat) + + plt.figure(1) + plt.plot(r, y_ideal, r, y_ripple, r, yfit) + plt.show() diff --git a/diffpy/srmise/peakstability.py b/src/diffpy/srmise/peakstability.py similarity index 60% rename from diffpy/srmise/peakstability.py rename to src/diffpy/srmise/peakstability.py index 3897a2d..28c9517 100644 --- a/diffpy/srmise/peakstability.py +++ b/src/diffpy/srmise/peakstability.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -14,7 +15,8 @@ import matplotlib.pyplot as plt import numpy as np -from diffpy.srmise import ModelCluster, PDFPeakExtraction +from diffpy.srmise.modelcluster import ModelCluster +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction # This is a total hack-job right now, and isn't suitable for @@ -27,7 +29,7 @@ class PeakStability: """Utility to test robustness of peaks. results: [error scalar, model, bl, dr] - ppe: a PDFPeakExtraction instance """ + ppe: a PDFPeakExtraction instance""" def __init__(self): self.results = [] @@ -39,11 +41,11 @@ def setppe(self, ppe): def load(self, filename): try: - import cPickle as pickle - except: - import pickle + import cPickle as pickle + except ImportError: + import pickle - in_s = open(filename, 'rb') + in_s = open(filename, "rb") try: (self.results, ppestr) = pickle.load(in_s) self.ppe = PDFPeakExtraction() @@ -67,10 +69,10 @@ def load(self, filename): def save(self, filename): try: - import cPickle as pickle - except: - import pickle - out_s = open(filename, 'wb') + import cPickle as pickle + except ImportError: + import pickle + out_s = open(filename, "wb") try: # Write to the stream outstr = self.ppe.writestr() @@ -82,18 +84,23 @@ def save(self, filename): if r[2] is None: bldict = None else: - bldict = {"pars":r[2].pars, "free":r[2].free, "removable":r[2].removable, "static_owner":r[2].static_owner} + bldict = { + "pars": r[2].pars, + "free": r[2].free, + "removable": r[2].removable, + "static_owner": r[2].static_owner, + } results2.append([r[0], r[1], bldict, r[3]]) pickle.dump([results2, outstr], out_s) finally: out_s.close() - def plotseries(self, style='o', **kwds): + def plotseries(self, style="o", **kwds): plt.figure() plt.ioff() for e, r, bl, dr in self.results: peakpos = [p["position"] for p in r] - es = [e]*len(peakpos) + es = [e] * len(peakpos) plt.plot(peakpos, es, style, **kwds) plt.ion() plt.draw() @@ -103,30 +110,55 @@ def plot(self, **kwds): plt.clf() plt.plot(*self.ppe.extracted.plottable(), **kwds) q = self.ppe.extracted.quality() - plt.suptitle("[%i/%i]\n" - "Uncertainty: %6.3f. Peaks: %i.\n" - "Quality: %6.3f. Chi-square: %6.3f" - %(self.current+1, len(self.results), self.ppe.effective_dy[0], len(self.ppe.extracted.model), q.stat, q.chisq)) + plt.suptitle( + "[%i/%i]\n" + "Uncertainty: %6.3f. Peaks: %i.\n" + "Quality: %6.3f. Chi-square: %6.3f" + % ( + self.current + 1, + len(self.results), + self.ppe.effective_dy[0], + len(self.ppe.extracted.model), + q.stat, + q.chisq, + ) + ) def setcurrent(self, idx): - """Make the idxth model the active one.""" + """Make the idxth model the active one. + + Parameters + ---------- + idx : int + The index of the model to be tested. + + Returns + ------- + None + """ self.current = idx if idx is not None: result = self.results[idx] - self.ppe.setvars(quiet=True, effective_dy=result[0]*np.ones(len(self.ppe.x))) + self.ppe.setvars(quiet=True, effective_dy=result[0] * np.ones(len(self.ppe.x))) (r, y, dr, dy) = self.ppe.resampledata(result[3]) - self.ppe.extracted = ModelCluster(result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf) + self.ppe.extracted = ModelCluster( + result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf + ) else: self.ppe.clearcalc() def animate(self, results=None, step=False, **kwds): """Show animation of extracted peaks from first to last. - Parameters: - step - Require keypress to show next plot - results - The indices of results to show + Keywords passed to pyplot.plot() - Keywords passed to pyplot.plot()""" + Parameters + ---------- + step : bool + Require keypress to show next plot + results array-like + The indices of results to show + """ if results is None: results = range(len(self.results)) @@ -134,7 +166,7 @@ def animate(self, results=None, step=False, **kwds): self.setcurrent(0) plt.ion() plt.plot(*self.ppe.extracted.plottable()) - a = plt.axis() + plt.axis() for i in results: self.setcurrent(i) plt.ioff() @@ -142,29 +174,38 @@ def animate(self, results=None, step=False, **kwds): plt.ion() plt.draw() if step: - raw_input() + input() self.setcurrent(oldcurrent) def run(self, err, savecovs=False): - """err is sequence of uncertainties to run at. + """Running the uncertainty for the results. + + Parameters + ---------- + err : array-like + The sequence of uncertainties to run at. + savecovs : bool + boolean to determine to save covariance matrix. Default is False. + If savecovs is True, return the covariance matrix for each final fit.""" - If savecovs is True, return the covariance matrix for each final fit.""" self.results = [] covs = [] for i, e in enumerate(err): - print "---- Running for uncertainty %s (%i/%i) ----" %(e, i, len(err)) + print("---- Running for uncertainty %s (%i/%i) ----" % (e, i, len(err))) self.ppe.clearcalc() self.ppe.setvars(effective_dy=e) if savecovs: covs.append(self.ppe.extract()) else: self.ppe.extract() - dr = (self.ppe.extracted.r_cluster[-1]-self.ppe.extracted.r_cluster[0])/(len(self.ppe.extracted.r_cluster)-1) + dr = (self.ppe.extracted.r_cluster[-1] - self.ppe.extracted.r_cluster[0]) / ( + len(self.ppe.extracted.r_cluster) - 1 + ) self.results.append([e, self.ppe.extracted.model, self.ppe.extracted.baseline, dr]) for e, r, bl, dr in self.results: - print "---- Results for uncertainty %s ----" %e - print r + print("---- Results for uncertainty %s ----" % e) + print(r) return covs diff --git a/diffpy/srmise/srmiseerrors.py b/src/diffpy/srmise/srmiseerrors.py similarity index 96% rename from diffpy/srmise/srmiseerrors.py rename to src/diffpy/srmise/srmiseerrors.py index f7ef037..62953d5 100644 --- a/diffpy/srmise/srmiseerrors.py +++ b/src/diffpy/srmise/srmiseerrors.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -28,7 +29,7 @@ """ -### Superclass class for diffpy.srmise.mise +# Superclass class for diffpy.srmise.mise class SrMiseError(Exception): """Superclass of all diffpy.srmise exceptions.""" @@ -43,7 +44,7 @@ def __str__(self): return self.info -### SrMiseError subclasses ### +# SrMiseError subclasses ### class SrMiseDataFormatError(SrMiseError): diff --git a/diffpy/srmise/srmiselog.py b/src/diffpy/srmise/srmiselog.py similarity index 81% rename from diffpy/srmise/srmiselog.py rename to src/diffpy/srmise/srmiselog.py index 3ea2c50..d7b15cd 100644 --- a/diffpy/srmise/srmiselog.py +++ b/src/diffpy/srmise/srmiselog.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -40,21 +41,22 @@ import logging import os.path import re -import sys from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError, SrMiseLogError -### Default settings ### +# Default settings ### defaultformat = "%(message)s" defaultlevel = logging.INFO -LEVELS = {'debug': logging.DEBUG, - 'info': logging.INFO, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL} +LEVELS = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, +} -### Set up logging to stdout ### +# Set up logging to stdout ### logger = logging.getLogger("diffpy.srmise") logger.setLevel(defaultlevel) ch = logging.StreamHandler() @@ -65,20 +67,22 @@ logger.addHandler(ch) -### Optional file logger ### +# Optional file logger ### fh = None -### Make updated plots as fitting progresses. ### +# Make updated plots as fitting progresses. ### liveplots = False wait = False + def addfilelog(filename, level=defaultlevel, format=defaultformat): """Log output from diffpy.srmise in specified file. Parameters filename: Name of file to receiving output level: The logging level - format: A string defining format of output messages conforming to logging package.""" + format: A string defining format of output messages conforming to logging package. + """ global fh fh = logging.FileHandler(filename) fh.setLevel(level) @@ -86,6 +90,7 @@ def addfilelog(filename, level=defaultlevel, format=defaultformat): fh.setFormatter(formatter) logger.addHandler(fh) + def setfilelevel(level): """Set level of file logger. @@ -101,6 +106,7 @@ def setfilelevel(level): emsg = "File handler does not exist, cannot set its level." raise SrMiseLogError(emsg) + def setlevel(level): """Set level of default (stdout) logger. @@ -112,6 +118,7 @@ def setlevel(level): if level < logger.getEffectiveLevel(): logger.setLevel(level) + def liveplotting(lp, w=False): """Set whether or not to use live plotting. @@ -136,7 +143,8 @@ def liveplotting(lp, w=False): raise ValueError(emsg) -### TracePeaks. Primary purpose is to enable creating movies. ### +# TracePeaks. Primary purpose is to enable creating movies. ### + class TracePeaks(object): """Output trace information during peak extraction.""" @@ -191,15 +199,20 @@ def maketrace(self, *args, **kwds): clusters.append(m.slice) for m in args[1:]: mc.replacepeaks(m.model) - return {"mc":mc, "clusters":clusters, "recursion":self.recursion, "counter":self.counter} + return { + "mc": mc, + "clusters": clusters, + "recursion": self.recursion, + "counter": self.counter, + } def writestr(self, trace): """Return string representation of current trace.""" lines = [] lines.append("### Trace") - lines.append("counter=%i" %trace["counter"]) - lines.append("recursion=%i" %trace["recursion"]) - lines.append("clusters=%s" %trace["clusters"]) + lines.append("counter=%i" % trace["counter"]) + lines.append("recursion=%i" % trace["recursion"]) + lines.append("clusters=%s" % trace["clusters"]) lines.append("### ModelCluster") lines.append(trace["mc"].writestr()) @@ -207,8 +220,8 @@ def writestr(self, trace): def write(self, trace): """Write current trace to file.""" - filename = "%s_%i" %(self.filebase, trace["counter"]) - f = open(filename, 'w') + filename = "%s_%i" % (self.filebase, trace["counter"]) + f = open(filename, "w") bytes = self.writestr(trace) f.write(bytes) f.close() @@ -225,14 +238,15 @@ def read(self, filename): "mc" - A ModelCluster instance "recursion" - The recursion level of mc""" try: - return self.readstr(open(filename,'rb').read()) - except SrMiseDataFormatError, err: + return self.readstr(open(filename, "rb").read()) + except SrMiseDataFormatError as err: logger.exception("") basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) - return None def readstr(self, datastring): """Read tracer ModelCluster from string. @@ -247,43 +261,49 @@ def readstr(self, datastring): "recursion" - The recursion level of mc""" # find where the ModelCluster section starts - res = re.search(r'^#+ ModelCluster\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ ModelCluster\s*(?:#.*\s+)*", datastring, re.M) if res: - header = datastring[:res.start()] - mc = datastring[res.end():].strip() + header = datastring[: res.start()] + mc = datastring[res.end() :].strip() else: emsg = "Required section 'ModelCluster' not found." raise SrMiseDataFormatError(emsg) # instantiate ModelCluster - if re.match(r'^None$', mc): + if re.match(r"^None$", mc): mc = None else: from diffpy.srmise.modelcluster import ModelCluster + mc = ModelCluster.factory(mc) - res = re.search(r'^clusters=(.*)$', header, re.M) + res = re.search(r"^clusters=(.*)$", header, re.M) if res: clusters = eval(res.groups()[0].strip()) else: emsg = "Required field 'clusters' not found." raise SrMiseDataFormatError(emsg) - res = re.search(r'^recursion=(.*)$', header, re.M) + res = re.search(r"^recursion=(.*)$", header, re.M) if res: - recursion = eval(res.groups()[0].strip()) + eval(res.groups()[0].strip()) else: emsg = "Required field 'recursion' not found." raise SrMiseDataFormatError(emsg) - res = re.search(r'^counter=(.*)$', header, re.M) + res = re.search(r"^counter=(.*)$", header, re.M) if res: - counter = eval(res.groups()[0].strip()) + eval(res.groups()[0].strip()) else: emsg = "Required field 'counter' not found." raise SrMiseDataFormatError(emsg) - return {"mc":mc, "clusters":clusters, "recursion":self.recursion, "counter":self.counter} + return { + "mc": mc, + "clusters": clusters, + "recursion": self.recursion, + "counter": self.counter, + } def pushr(self): """Enter a layer of recursion, and return new level.""" @@ -315,16 +335,22 @@ def reset_trace(self): # filter property def setfilter(self, filter): - self.__filter = compile(" and ".join(["(%s)" %f for f in filter]), '', 'eval') - def getfilter(self): return self.__filter + self.__filter = compile(" and ".join(["(%s)" % f for f in filter]), "", "eval") + + def getfilter(self): + return self.__filter + filter = property(getfilter, setfilter) -### End of class TracePeaks + +# End of class TracePeaks + def settracer(**kwds): global tracer tracer = TracePeaks(**kwds) return tracer + # Default tracer never emits tracer = settracer() diff --git a/src/diffpy/srmise/tests/__init__.py b/src/diffpy/srmise/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/diffpy/srmise/tests/conftest.py b/src/diffpy/srmise/tests/conftest.py new file mode 100644 index 0000000..e3b6313 --- /dev/null +++ b/src/diffpy/srmise/tests/conftest.py @@ -0,0 +1,19 @@ +import json +from pathlib import Path + +import pytest + + +@pytest.fixture +def user_filesystem(tmp_path): + base_dir = Path(tmp_path) + home_dir = base_dir / "home_dir" + home_dir.mkdir(parents=True, exist_ok=True) + cwd_dir = base_dir / "cwd_dir" + cwd_dir.mkdir(parents=True, exist_ok=True) + + home_config_data = {"username": "home_username", "email": "home@email.com"} + with open(home_dir / "diffpyconfig.json", "w") as f: + json.dump(home_config_data, f) + + yield tmp_path diff --git a/src/diffpy/srmise/tests/debug.py b/src/diffpy/srmise/tests/debug.py new file mode 100644 index 0000000..313b120 --- /dev/null +++ b/src/diffpy/srmise/tests/debug.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +############################################################################## +# +# (c) 2024 The Trustees of Columbia University in the City of New York. +# All rights reserved. +# +# File coded by: Billinge Group members and community contributors. +# +# See GitHub contributions for a more detailed list of contributors. +# https://github.com/diffpy/diffpy.srmise/graphs/contributors +# +# See LICENSE.rst for license information. +# +############################################################################## + +""" +Convenience module for debugging the unit tests using + +python -m diffpy.srmise.tests.debug + +Exceptions raised by failed tests or other errors are not caught. +""" + + +if __name__ == "__main__": + import sys + + from diffpy.srmise.tests import testsuite + + pattern = sys.argv[1] if len(sys.argv) > 1 else "" + suite = testsuite(pattern) + suite.debug() + + +# End of file diff --git a/src/diffpy/srmise/tests/run.py b/src/diffpy/srmise/tests/run.py new file mode 100644 index 0000000..afe4bbf --- /dev/null +++ b/src/diffpy/srmise/tests/run.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +############################################################################## +# +# (c) 2024 The Trustees of Columbia University in the City of New York. +# All rights reserved. +# +# File coded by: Billinge Group members and community contributors. +# +# See GitHub contributions for a more detailed list of contributors. +# https://github.com/diffpy/diffpy.srmise/graphs/contributors +# +# See LICENSE.rst for license information. +# +############################################################################## +"""Convenience module for executing all unit tests with +python -m diffpy.srmise.tests.run +""" + +import sys + +import pytest + +if __name__ == "__main__": + # show output results from every test function + args = ["-v"] + # show the message output for skipped and expected failure tests + if len(sys.argv) > 1: + args.extend(sys.argv[1:]) + print("pytest arguments: {}".format(args)) + # call pytest and exit with the return code from pytest + exit_res = pytest.main(args) + sys.exit(exit_res) + +# End of file diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py new file mode 100644 index 0000000..c9fa8a7 --- /dev/null +++ b/src/diffpy/srmise/tests/test_dataclusters.py @@ -0,0 +1,89 @@ +from copy import copy + +import numpy as np +import pytest + +from diffpy.srmise.dataclusters import DataClusters + + +def test___eq__(): + actual = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1) + expected = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1) + assert expected == actual + attributes = vars(actual) + for attr_key, attr_val in attributes.items(): + reset = copy(attr_val) + assert expected == actual + if attr_val is not None: + attributes.update({attr_key: attr_val + 1}) + else: + attributes.update({attr_key: 1}) + try: + assert not expected == actual + except AssertionError: + print(f"not-equal test failed on {attr_key}") + assert not expected == actual + attributes.update({attr_key: reset}) + + +@pytest.mark.parametrize( + "inputs, expected", + [ + ( + { + "x": np.array([1, 2, 3]), + "y": np.array([3, 2, 1]), + "res": 4, + }, + { + "x": np.array([1, 2, 3]), + "y": np.array([3, 2, 1]), + "res": 4, + "data_order": [2, 1, 0], + "clusters": np.array([[0, 0]]), + "current_idx": 2, + "lastpoint_idx": 0, + "INIT": 0, + "READY": 1, + "CLUSTERING": 2, + "DONE": 3, + "lastcluster_idx": None, + "status": 1, + }, + ), + ], +) +def test_DataClusters_constructor(inputs, expected): + actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"]) + actual_attributes = vars(actual) + for attr_key, actual_attr_val in actual_attributes.items(): + if isinstance(actual_attr_val, np.ndarray): + assert np.array_equal(actual_attr_val, expected[attr_key]) + else: + assert actual_attr_val == expected[attr_key] + + +@pytest.mark.parametrize( + "inputs, msg", + [ + ( + { + "x": np.array([1]), + "y": np.array([3, 2]), + "res": 4, + }, + "Sequences x and y must have the same length.", + ), + ( + { + "x": np.array([1]), + "y": np.array([3]), + "res": -1, + }, + "Value of resolution parameter is less than zero. Please rerun specifying a non-negative res", + ), + ], +) +def test_set_data_order_bad(inputs, msg): + with pytest.raises(ValueError, match=msg): + DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"]) diff --git a/src/diffpy/srmise/version.py b/src/diffpy/srmise/version.py new file mode 100644 index 0000000..304027c --- /dev/null +++ b/src/diffpy/srmise/version.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +############################################################################## +# +# (c) 2024 The Trustees of Columbia University in the City of New York. +# All rights reserved. +# +# File coded by: Billinge Group members and community contributors. +# +# See GitHub contributions for a more detailed list of contributors. +# https://github.com/diffpy/diffpy.srmise/graphs/contributors +# +# See LICENSE.rst for license information. +# +############################################################################## + +"""Definition of __version__.""" + +# We do not use the other three variables, but can be added back if needed. +# __all__ = ["__date__", "__git_commit__", "__timestamp__", "__version__"] + +# obtain version information +from importlib.metadata import version + +__version__ = version("diffpy.srmise") + +# End of file