|
| 1 | + |
| 2 | +import os |
| 3 | +import sys |
| 4 | +from argparse import ArgumentParser, RawTextHelpFormatter, Namespace |
| 5 | +import time |
| 6 | +from datetime import datetime |
| 7 | + |
| 8 | +from diffpy.nmf_mapping import nmf |
| 9 | +import numpy as np |
| 10 | + |
| 11 | + |
| 12 | +def boolean_string(s): |
| 13 | + try: |
| 14 | + if s.lower() not in {'false', 'true'}: |
| 15 | + raise ValueError('Not a valid boolean string') |
| 16 | + except AttributeError: |
| 17 | + raise ValueError('Not a valid boolean string') |
| 18 | + return s.lower() == 'true' |
| 19 | + |
| 20 | + |
| 21 | +def main(args=None): |
| 22 | + """ |
| 23 | + Parses directory argument supplied by user and conducts NMF decomposition |
| 24 | + analysis (computes NMF decomposition and shows the weights over time). |
| 25 | + """ |
| 26 | + |
| 27 | + _BANNER = """ |
| 28 | + This is a package which takes a directory of 1D diffraction files |
| 29 | + (xrd or pdf) and returns json files containing the decomposed components, |
| 30 | + the phase fraction of these components from file to file, |
| 31 | + as well as the reconstruction error as a fxn of component |
| 32 | + """ |
| 33 | + |
| 34 | + parser = ArgumentParser(prog='nmf_mapping', |
| 35 | + description=_BANNER, formatter_class=RawTextHelpFormatter) |
| 36 | + |
| 37 | + def tup(s): |
| 38 | + try: |
| 39 | + l, h = map(int, s.split(',')) |
| 40 | + return l,h |
| 41 | + except: |
| 42 | + raise TypeError('r range must be low, high') |
| 43 | + |
| 44 | + # args |
| 45 | + parser.add_argument("directory", default=None, type=str, |
| 46 | + help="a directory of PDFs to calculate NMF decomposition") |
| 47 | + group = parser.add_mutually_exclusive_group() |
| 48 | + parser.add_argument("--save-files", default=True, type=boolean_string, |
| 49 | + help='whether to save the component, graph, and json files in the execution directory\n' |
| 50 | + 'default: True\n' |
| 51 | + 'e.g. --save-files False') |
| 52 | + group.add_argument("--threshold", default=None, type=int, |
| 53 | + help="a threshold for the number of structural phases graphed (NMF components returned)\n" |
| 54 | + "e.g. --threshold 3") |
| 55 | + group.add_argument("--improve-thresh", default=None, type=float, |
| 56 | + help="a threshold (between 0 and 1) for the relative improvement ratio necessary to add an" |
| 57 | + " additional component. Default is 0.001. 0.1 Recommended for real data.\n" |
| 58 | + "e.g. --improve-thresh 0.1") |
| 59 | + group.add_argument("--pca-thresh", default=None, type=float, |
| 60 | + help="a threshold (between 0 and 1) for the explained variance of PCA to determine the \n" |
| 61 | + "number of components for NMF. e.g. --pca-thresh 0.95") |
| 62 | + parser.add_argument("--n-iter", default=None, type=int, |
| 63 | + help="total number of iterations to run NMF algo. Defaults to 1000. 10000 typical to publish.") |
| 64 | + parser.add_argument("--xrd", default=False, type=boolean_string, |
| 65 | + help="whether to look for .xy files rather than .gr files\n" |
| 66 | + "default: False\n" |
| 67 | + "e.g. --xrd True") |
| 68 | + parser.add_argument("--x_units", default=None, type=str, choices=["twotheta", "q"], required='--xrd' in sys.argv, |
| 69 | + help="x axis units for XRD data\n" |
| 70 | + "default: None\n" |
| 71 | + "e.g. --x_units twotheta") |
| 72 | + parser.add_argument("--xrange", default=None, type=tup, nargs='*', |
| 73 | + help="the x-range over which to calculate NMF, can be multiple ranges (e.g. --xrange 5,10 12,15)") |
| 74 | + parser.add_argument("--show", default=True, type=boolean_string, |
| 75 | + help='whether to show the plot') |
| 76 | + args0 = Namespace() |
| 77 | + args1, _ = parser.parse_known_args(args, namespace=args0) |
| 78 | + |
| 79 | + input_list, data_list = nmf.load_data(args1.directory, args1.xrd) |
| 80 | + if args1.pca_thresh: |
| 81 | + df_components, df_component_weight_timeseries, df_reconstruction_error, df_explained_var_ratio = \ |
| 82 | + nmf.NMF_decomposition(input_list, args1.xrange, args1.threshold, additional_comp=False, |
| 83 | + improve_thresh=args1.improve_thresh, n_iter=args1.n_iter, |
| 84 | + pca_thresh=args1.pca_thresh) |
| 85 | + else: |
| 86 | + df_components, df_component_weight_timeseries, df_reconstruction_error = \ |
| 87 | + nmf.NMF_decomposition(input_list, args1.xrange, args1.threshold, additional_comp=False, |
| 88 | + improve_thresh=args1.improve_thresh, n_iter=args1.n_iter) |
| 89 | + |
| 90 | + print(f'Number of components: {len(df_components.columns)}') |
| 91 | + |
| 92 | + fig1 = nmf.component_plot(df_components, args1.xrd, args1.x_units, args1.show) |
| 93 | + fig2 = nmf.component_ratio_plot(df_component_weight_timeseries, args1.show) |
| 94 | + fig3 = nmf.reconstruction_error_plot(df_reconstruction_error, args1.show) |
| 95 | + if args1.pca_thresh: |
| 96 | + fig4 = nmf.explained_variance_plot(df_explained_var_ratio, args1.show) |
| 97 | + |
| 98 | + if args1.save_files: |
| 99 | + if not os.path.exists(os.path.join(os.getcwd(), 'nmf_result')): |
| 100 | + os.mkdir(os.path.join(os.getcwd(), 'nmf_result')) |
| 101 | + output_fn = datetime.fromtimestamp(time.time()).strftime( |
| 102 | + '%Y%m%d%H%M%S%f') |
| 103 | + df_components.to_json(os.path.join(os.getcwd(), 'nmf_result', 'x_index_vs_y_col_components.json')) |
| 104 | + df_component_weight_timeseries.to_json(os.path.join(os.getcwd(), 'nmf_result', 'component_index_vs_pratio_col.json')) |
| 105 | + df_component_weight_timeseries.to_csv(os.path.join(os.getcwd(), 'nmf_result', output_fn + 'component_row_pratio_col.txt'), header=None, index=False, sep=' ', mode='a') |
| 106 | + df_reconstruction_error.to_json(os.path.join(os.getcwd(), 'nmf_result', 'component_index_vs_RE_value.json')) |
| 107 | + plot_file1 = os.path.join(os.getcwd(), 'nmf_result', output_fn + "comp_plot.png") |
| 108 | + plot_file2 = os.path.join(os.getcwd(), 'nmf_result', output_fn + "ratio_plot.png") |
| 109 | + plot_file3 = os.path.join(os.getcwd(), 'nmf_result', output_fn + "loss_plot.png") |
| 110 | + if args1.pca_thresh: |
| 111 | + plot_file7 = os.path.join(os.getcwd(), 'nmf_result', output_fn + "pca_var_plot.png") |
| 112 | + plot_file4 = os.path.splitext(plot_file1)[0] + '.pdf' |
| 113 | + plot_file5 = os.path.splitext(plot_file2)[0] + '.pdf' |
| 114 | + plot_file6 = os.path.splitext(plot_file3)[0] + '.pdf' |
| 115 | + if args1.pca_thresh: |
| 116 | + plot_file8 = os.path.splitext(plot_file7)[0] + '.pdf' |
| 117 | + txt_file = os.path.join(os.getcwd(), 'nmf_result', output_fn + '_meta' + '.txt') |
| 118 | + with open(txt_file, 'w+') as fi: |
| 119 | + fi.write('NMF Analysis\n\n') |
| 120 | + fi.write(f'{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n') |
| 121 | + fi.write(f'The selected active r ranges are: {args1.xrange} \n\n') |
| 122 | + fi.write('Thesholding:\n') |
| 123 | + fi.write(f'\tThe input component threshold was: {args1.threshold}\n') |
| 124 | + fi.write(f'\tThe input improvement threshold was: {args1.improve_thresh}\n') |
| 125 | + fi.write(f'\tThe input # of iterations to run was: {args1.n_iter}\n') |
| 126 | + fi.write(f'\tWas PCA thresholding used?: {args1.pca_thresh}\n') |
| 127 | + fi.write(f'{len(df_components.columns)} components were extracted') |
| 128 | + |
| 129 | + fig1.savefig(plot_file1) |
| 130 | + fig2.savefig(plot_file2) |
| 131 | + fig3.savefig(plot_file3) |
| 132 | + if args1.pca_thresh: |
| 133 | + fig4.savefig(plot_file7) |
| 134 | + fig1.savefig(plot_file4) |
| 135 | + fig2.savefig(plot_file5) |
| 136 | + fig3.savefig(plot_file6) |
| 137 | + if args1.pca_thresh: |
| 138 | + fig4.savefig(plot_file8) |
| 139 | + columns = df_components.columns |
| 140 | + for i, col in enumerate(columns): |
| 141 | + data = np.column_stack([df_components.index.to_list(), df_components[col].to_list()]) |
| 142 | + |
| 143 | + if args1.xrd: |
| 144 | + np.savetxt(os.path.join(os.getcwd(), 'nmf_result', output_fn + f'_comp{i}' + '.xy'), data, |
| 145 | + header=f"NMF Generated XRD\nSource = nmfMapping\n" |
| 146 | + f"Date = {output_fn}\n{args1.x_units} Intensity\n", fmt='%s', |
| 147 | + comments="' ") |
| 148 | + else: |
| 149 | + np.savetxt(os.path.join(os.getcwd(), 'nmf_result', output_fn + f'_comp{i}' + '.cgr'), data, |
| 150 | + header=f"NMF Generated PDF\nSource: nmfMapping\n" |
| 151 | + f"Date: {output_fn}\nr g", fmt='%s') |
| 152 | + |
| 153 | + |
| 154 | +if __name__ == "__main__": |
| 155 | + main() |
0 commit comments