diffpy
diff --git a/‎.gitignore
Lines changed: 23 additions & 0 deletions b/‎.gitignore
Lines changed: 23 additions & 0 deletions
diff --git a/‎.travis.yml
Lines changed: 36 additions & 0 deletions b/‎.travis.yml
Lines changed: 36 additions & 0 deletions
diff --git a/‎AUTHORS.txt
Lines changed: 2 additions & 0 deletions b/‎AUTHORS.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎LICENSE.txt
Lines changed: 33 additions & 0 deletions b/‎LICENSE.txt
Lines changed: 33 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 84 additions & 0 deletions b/‎README.md
Lines changed: 84 additions & 0 deletions
diff --git a/‎diffpy/__init__.py
Lines changed: 23 additions & 0 deletions b/‎diffpy/__init__.py
Lines changed: 23 additions & 0 deletions
diff --git a/‎diffpy/nmf_mapping/__init__.py
Lines changed: 26 additions & 0 deletions b/‎diffpy/nmf_mapping/__init__.py
Lines changed: 26 additions & 0 deletions
diff --git a/‎diffpy/nmf_mapping/nmf_mapping/main.py
Lines changed: 155 additions & 0 deletions b/‎diffpy/nmf_mapping/nmf_mapping/main.py
Lines changed: 155 additions & 0 deletions
@@ -0,0 +1,23 @@
+cache-directory/*
+*.pyc
+lib/*
+.pytest_cache/*
+
+# Rever
+rever/
+
+instance/*.json
+download/*
+.idea/
+
+
+*.egg-info
+*.egg-info/
+bin
+develop-eggs
+dist
+lib
+lib64
+eggs
+parts
+.installed.cfg
@@ -0,0 +1,36 @@
+sudo: False
+
+language: python
+python:
+  - "3.7"
+cache:
+  directories:
+    - $HOME/.cache/pip
+
+matrix:
+  include:
+    - python: 3.7
+
+
+install:
+  # Install conda
+  - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+  - bash miniconda.sh -b -p $HOME/miniconda
+  - export PATH="$HOME/miniconda/bin:$PATH"
+  - conda config --set always_yes yes --set changeps1 no
+  - conda config --add channels conda-forge
+  - conda config --add channels diffpy
+  - conda update conda
+    # Install dependencies
+  - conda create -n test --file requirements/run.txt python=3.7
+  - source activate test
+  - python setup.py install
+
+script:
+  - set -e
+  - conda install --file requirements/test.txt
+  - python -m pytest
+
+notifications:
+  email: false
+
@@ -0,0 +1,2 @@
+Simon J. L. Billinge <[email protected]>
+Zachary A. Thatcher<[email protected]>
@@ -0,0 +1,33 @@
+This program is part of the DiffPy open-source project at Columbia
+University and is available subject to the conditions and terms laid out below.
+
+Copyright © 2009-2019, Trustees of Columbia University in the City of New York,
+all rights reserved.
+
+For more information please visit the diffpy web-page at http://diffpy.org or
+email Prof. Simon Billinge at [email protected].
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice, this
+    list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of COLUMBIA UNIVERSITY, MICHIGAN STATE UNIVERSITY nor the
+    names of their contributors may be used to endorse or promote products
+    derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,84 @@
+NMF Mapping for PDF or XRD Files
+---------
+This package takes a directory containing diffraction files in .gr (or .xy/.xye) format and performs an NMF decomposition of
+the components with the goal of determining the number of structural phases present, and when these phases are
+present if the data provided comes from a time series. Any non .gr ( or .xy/.xye) files or .dat files in
+the directory will be ignored and skipped in the calculation.
+
+Use
+---------
+This package is the backend logic for pdfitc.org/NMF. Please consider utilizing pdfitc.org/NMF prior to this tool, if
+possible. If your NMF analysis requires some feature from this CLI that isn't present on the website, please let us know
+and we will consider adding the feature to the pdfitc.org interface.
+
+Installation
+--------
+- Install requirements from run.txt via "conda (or pip) install --file (or -r) 'requirements/run.txt'"
+- Install using "pip install -e ." in a python 3 environment
+
+Argparse
+--------
+Input:
+- directory: path to the directory containing the diffraction files that are to be analyzed
+    - format: string (filepath)
+        - eg: '/Users/zthatcher/Desktop/Data/nmf_mapping/time_data/' or . for cwd
+
+- save-files (optional): boolean as to whether or not you would like to save the dataframes, plots, and 
+components (note: pdf data saves as .cgr and xrd data saves as .xy)
+    - format: boolean
+        - eg: --save-files False
+    - default: True
+    
+- threshold (optional and mut-exc to other thresholds): a threshold for the number of structural phases graphed (NMF components returned)
+    - format as: integer
+        - eg: --threshold 2
+    - default: 10
+    
+- improve-thresh (optional and mut-exc to other thresholds): a threshold (between 0 and 1) for the relative improvement ratio necessary to
+  add an additional component. Default is 0.001. 0.1 Recommended for real data.
+    - format: float
+        - eg: --improve-thresh 0.1
+    - default = 0.001
+    
+- pca-thresh (optional and mut-exc to other thresholds): explained variance threshold for PCA component counting cutoff
+    - format: float
+        - eg: --pca-thresh 0.95
+    - default = None
+    
+- n-iter (optional): total number of iterations to run NMF algo. Defaults to 1000. 10000 typical to publish.
+    - format: int
+        - eg: --n-iter 10000
+    - default: 1000
+
+- x-range (optional): the active x-range over which to run the NMF analysis (must be between shortest and 
+longest range in the set of files)
+    - format: pair of integers representing the lower r bound and the upper r bound with a comma between 
+    the lower and upper bound
+        -  eg: --xrange 5,10 12,15
+    - default: entire range
+
+- xrd (optional): set this option if the directory contains xy or xye files rather than gr.
+    - format: boolean
+        - eg: --xrd True
+    - default: False
+
+- x_units (required if xrd): set this as either twotheta or q if working with xrd data.
+    - format: enum[str]
+        - eg: --x_units twotheta
+    - default: None (since --xrd defaults to False)
+
+- show graphs (optional): whether you or not you would like display the images
+    - format: boolean
+        - eg: --show False
+    - default: True
+    
+Returns:
+- Figure One: PDF or XRD pattern of structural phase components contributing to the NMF reconstruction
+- Figure Two: Weights of the phase components plotted in Figure One
+- Figure Three: Reconstruction error as a function of components
+- (Optional) Figure Four: Explained Variance plot as a function of components for PCA thresholding
+
+Example:
+
+nmf_mapping . --threshold 3 --xrange 5,10 --show True
+
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+##############################################################################
+#
+# diffpy            by DANSE Diffraction group
+#                   Simon J. L. Billinge
+#                   (c) 2008 Trustees of the Columbia University
+#                   in the City of New York.  All rights reserved.
+#
+# File coded by:    Pavol Juhas
+#
+# See AUTHORS.txt for a list of people who contributed.
+# See LICENSE.txt for license information.
+#
+##############################################################################
+
+"""nmf_mapping - tools for performing NMF on PDF and XRD data.
+"""
+
+
+__import__('pkg_resources').declare_namespace(__name__)
+
+
+# End of file
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+##############################################################################
+#
+# nmf_mapping      by DANSE Diffraction group
+#                   Simon J. L. Billinge
+#                   (c) 2006 trustees of the Michigan State University.
+#                   All rights reserved.
+#
+# File coded by:    Chris Farrow
+#
+# See AUTHORS.txt for a list of people who contributed.
+# See LICENSE.txt for license information.
+#
+##############################################################################
+
+"""Tools for manipulating and comparing PDFs.
+"""
+
+__id__ = "$Id$"
+
+# obtain version information
+__version__ = '0.0.1'
+
+# top-level import
+from diffpy.nmf_mapping.nmf_mapping import nmf_mapping_code as nmf
+# End of file
@@ -0,0 +1,155 @@
+
+import os
+import sys
+from argparse import ArgumentParser, RawTextHelpFormatter, Namespace
+import time
+from datetime import datetime
+
+from diffpy.nmf_mapping import nmf
+import numpy as np
+
+
+def boolean_string(s):
+    try:
+        if s.lower() not in {'false', 'true'}:
+            raise ValueError('Not a valid boolean string')
+    except AttributeError:
+        raise ValueError('Not a valid boolean string')
+    return s.lower() == 'true'
+
+
+def main(args=None):
+    """
+    Parses directory argument supplied by user and conducts NMF decomposition
+    analysis (computes NMF decomposition and shows the weights over time).
+    """
+
+    _BANNER = """
+    This is a package which takes a directory of 1D diffraction files 
+    (xrd or pdf) and returns json files containing the decomposed components, 
+    the phase fraction of these components from file to file, 
+    as well as the reconstruction error as a fxn of component
+    """
+
+    parser = ArgumentParser(prog='nmf_mapping',
+                            description=_BANNER, formatter_class=RawTextHelpFormatter)
+
+    def tup(s):
+        try:
+            l, h = map(int, s.split(','))
+            return l,h
+        except:
+            raise TypeError('r range must be low, high')
+
+    # args
+    parser.add_argument("directory", default=None, type=str,
+                        help="a directory of PDFs to calculate NMF decomposition")
+    group = parser.add_mutually_exclusive_group()
+    parser.add_argument("--save-files", default=True, type=boolean_string,
+                        help='whether to save the component, graph, and json files in the execution directory\n'
+                             'default: True\n'
+                             'e.g. --save-files False')
+    group.add_argument("--threshold", default=None, type=int,
+                        help="a threshold for the number of structural phases graphed (NMF components returned)\n"
+                             "e.g. --threshold 3")
+    group.add_argument("--improve-thresh", default=None, type=float,
+                        help="a threshold (between 0 and 1) for the relative improvement ratio necessary to add an"
+                             " additional component. Default is 0.001. 0.1 Recommended for real data.\n"
+                             "e.g. --improve-thresh 0.1")
+    group.add_argument("--pca-thresh", default=None, type=float,
+                       help="a threshold (between 0 and 1) for the explained variance of PCA to determine the \n"
+                            "number of components for NMF. e.g. --pca-thresh 0.95")
+    parser.add_argument("--n-iter", default=None, type=int,
+                        help="total number of iterations to run NMF algo. Defaults to 1000. 10000 typical to publish.")
+    parser.add_argument("--xrd", default=False, type=boolean_string,
+                        help="whether to look for .xy files rather than .gr files\n"
+                             "default: False\n"
+                             "e.g. --xrd True")
+    parser.add_argument("--x_units", default=None, type=str, choices=["twotheta", "q"], required='--xrd' in sys.argv,
+                        help="x axis units for XRD data\n"
+                             "default: None\n"
+                             "e.g. --x_units twotheta")
+    parser.add_argument("--xrange", default=None, type=tup, nargs='*',
+                        help="the x-range over which to calculate NMF, can be multiple ranges (e.g. --xrange 5,10 12,15)")
+    parser.add_argument("--show", default=True, type=boolean_string,
+                        help='whether to show the plot')
+    args0 = Namespace()
+    args1, _ = parser.parse_known_args(args, namespace=args0)
+
+    input_list, data_list = nmf.load_data(args1.directory, args1.xrd)
+    if args1.pca_thresh:
+        df_components, df_component_weight_timeseries, df_reconstruction_error, df_explained_var_ratio = \
+            nmf.NMF_decomposition(input_list, args1.xrange, args1.threshold, additional_comp=False,
+                                                improve_thresh=args1.improve_thresh, n_iter=args1.n_iter,
+                                                pca_thresh=args1.pca_thresh)
+    else:
+        df_components, df_component_weight_timeseries, df_reconstruction_error = \
+            nmf.NMF_decomposition(input_list, args1.xrange, args1.threshold, additional_comp=False,
+                                                improve_thresh=args1.improve_thresh, n_iter=args1.n_iter)
+
+    print(f'Number of components: {len(df_components.columns)}')
+
+    fig1 = nmf.component_plot(df_components, args1.xrd, args1.x_units, args1.show)
+    fig2 = nmf.component_ratio_plot(df_component_weight_timeseries, args1.show)
+    fig3 = nmf.reconstruction_error_plot(df_reconstruction_error, args1.show)
+    if args1.pca_thresh:
+        fig4 = nmf.explained_variance_plot(df_explained_var_ratio, args1.show)
+
+    if args1.save_files:
+        if not os.path.exists(os.path.join(os.getcwd(), 'nmf_result')):
+            os.mkdir(os.path.join(os.getcwd(), 'nmf_result'))
+        output_fn = datetime.fromtimestamp(time.time()).strftime(
+            '%Y%m%d%H%M%S%f')
+        df_components.to_json(os.path.join(os.getcwd(), 'nmf_result', 'x_index_vs_y_col_components.json'))
+        df_component_weight_timeseries.to_json(os.path.join(os.getcwd(), 'nmf_result', 'component_index_vs_pratio_col.json'))
+        df_component_weight_timeseries.to_csv(os.path.join(os.getcwd(), 'nmf_result', output_fn + 'component_row_pratio_col.txt'), header=None, index=False, sep=' ', mode='a')
+        df_reconstruction_error.to_json(os.path.join(os.getcwd(), 'nmf_result', 'component_index_vs_RE_value.json'))
+        plot_file1 = os.path.join(os.getcwd(), 'nmf_result', output_fn + "comp_plot.png")
+        plot_file2 = os.path.join(os.getcwd(), 'nmf_result', output_fn + "ratio_plot.png")
+        plot_file3 = os.path.join(os.getcwd(), 'nmf_result', output_fn + "loss_plot.png")
+        if args1.pca_thresh:
+            plot_file7 = os.path.join(os.getcwd(), 'nmf_result', output_fn + "pca_var_plot.png")
+        plot_file4 = os.path.splitext(plot_file1)[0] + '.pdf'
+        plot_file5 = os.path.splitext(plot_file2)[0] + '.pdf'
+        plot_file6 = os.path.splitext(plot_file3)[0] + '.pdf'
+        if args1.pca_thresh:
+            plot_file8 = os.path.splitext(plot_file7)[0] + '.pdf'
+        txt_file = os.path.join(os.getcwd(), 'nmf_result', output_fn + '_meta' + '.txt')
+        with open(txt_file, 'w+') as fi:
+            fi.write('NMF Analysis\n\n')
+            fi.write(f'{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n')
+            fi.write(f'The selected active r ranges are:  {args1.xrange} \n\n')
+            fi.write('Thesholding:\n')
+            fi.write(f'\tThe input component threshold was: {args1.threshold}\n')
+            fi.write(f'\tThe input improvement threshold was: {args1.improve_thresh}\n')
+            fi.write(f'\tThe input # of iterations to run was: {args1.n_iter}\n')
+            fi.write(f'\tWas PCA thresholding used?: {args1.pca_thresh}\n')
+            fi.write(f'{len(df_components.columns)} components were extracted')
+
+        fig1.savefig(plot_file1)
+        fig2.savefig(plot_file2)
+        fig3.savefig(plot_file3)
+        if args1.pca_thresh:
+            fig4.savefig(plot_file7)
+        fig1.savefig(plot_file4)
+        fig2.savefig(plot_file5)
+        fig3.savefig(plot_file6)
+        if args1.pca_thresh:
+            fig4.savefig(plot_file8)
+        columns = df_components.columns
+        for i, col in enumerate(columns):
+            data = np.column_stack([df_components.index.to_list(), df_components[col].to_list()])
+
+            if args1.xrd:
+                np.savetxt(os.path.join(os.getcwd(), 'nmf_result', output_fn + f'_comp{i}' + '.xy'), data,
+                           header=f"NMF Generated XRD\nSource = nmfMapping\n"
+                                  f"Date = {output_fn}\n{args1.x_units} Intensity\n", fmt='%s',
+                           comments="' ")
+            else:
+                np.savetxt(os.path.join(os.getcwd(), 'nmf_result', output_fn + f'_comp{i}' + '.cgr'), data,
+                           header=f"NMF Generated PDF\nSource: nmfMapping\n"
+                                  f"Date: {output_fn}\nr g", fmt='%s')
+
+
+if __name__ == "__main__":
+    main()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+Simon J. L. Billinge <[email protected]>`
	`2`	`+Zachary A. Thatcher<[email protected]>`