forked from diffpy/diffpy.srmise
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquery_results.py
222 lines (195 loc) · 9.44 KB
/
query_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env python
##############################################################################
#
# diffpy.srmise by Luke Granlund
# (c) 2015 trustees of the Michigan State University.
# All rights reserved.
#
# File coded by: Luke Granlund
#
# See LICENSE.txt for license information.
#
##############################################################################
"""Example of extracting multiple peaks and accessing results programatically.
This example shows how to extract a range of peaks from a simple crystalline
PDF using a crystalline baseline obtained from an existing trial. It shows
how to access the value and uncertainty of peak parameters, including
transforming between different peak parameterizations. Finally, it shows how
to evaluate the model on an arbitrary grid.
The peaks extracted by this script are equivalent to those obtained running
srmise data/Ag_nyquist_qmax30.gr --range 2. 10. \
--bsrmise output/extract_single_peak.srmise \
--save output/query_results.srmise \
--pwa output/query_results.pwa --plot
at the command line.
"""
import matplotlib.pyplot as plt
import numpy as np
from diffpy.srmise.modelcluster import ModelCovariance
from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction
def run(plot=True):
# Initialize peak extraction
# Create peak extraction object
ppe = PDFPeakExtraction()
# Load the PDF from a file
ppe.loadpdf("data/Ag_nyquist_qmax30.gr")
# Obtain baseline from a saved diffpy.srmise trial. This is not the
# initial baseline estimate from the previous example, but the baseline
# after both it and the extracted peaks have been fit to the data.
ppebl = PDFPeakExtraction()
ppebl.read("output/extract_single_peak.srmise")
baseline = ppebl.extracted.baseline
# Set up extraction parameters.
# Peaks are extracted between 2 and 10 angstroms, using the baseline
# from the isolated peak example.
kwds = {}
kwds["rng"] = [2.0, 10.0]
kwds["baseline"] = baseline
# Apply peak extraction parameters.
ppe.setvars(**kwds)
# Perform peak extraction, and retain object containing a copy of the
# model and the full covariance matrix.
cov = ppe.extract()
print("\n======= Accessing SrMise Results ========")
# Accessing results of extraction
#
# Model parameters are organized using a nested structure, with a list
# of peaks each of which is a list of parameters, similar to the the
# following schematic.
# Peak
# Position
# Width
# Area
# Peak
# Position
# Width
# Area*
# ...
# Baseline
# Slope
# Intercept
# By convention, the baseline is the final "peak." The ModelCovariance
# object returned by extract() can return information about any peak by
# using the appropriate tuple of indices (i,j). That is, (i,j) denotes
# the jth parameter of the ith peak. For example, the starred parameter
# above is the area (index = 2) of the next nearest neighbor (index = 1)
# peak. Thus, this parameter can be referenced as (1,2). Several examples
# are presented below.
print("\n------ Parameter values and uncertainties ------")
# ModelCovariance.get() returns a (value, uncertainty) tuple for a given
# parameter. These are the results for the nearest-neighbor peak.
p0 = cov.get((0, 0))
w0 = cov.get((0, 1))
a0 = cov.get((0, 2))
print("Nearest-neighbor peak: ")
print(" position = %f +/- %f" % p0)
print(" width = %f +/- %f" % w0)
print(" area = %f +/- %f" % a0)
print(" Covariance(width, area) = ", cov.getcovariance((0, 1), (0, 2)))
# Baseline parameters. By convention, baseline is final element in cov.
(slope, intercept) = cov.model[-1]
print("\nThe linear baseline B(r)=%f*r + %f" % tuple(par for par in cov.model[-1]))
print("\n ------ Uncertainties from a Saved File --------")
# A .srmise file does not save the full covariance matrix, so it must be
# recalculated when loading from these files. For example, here is the
# nearest-neighbor peak in the file which we used to define the initial
# baseline.
cov2 = ModelCovariance()
ppebl.extracted.fit(fitbaseline=True, cov=cov2, cov_format="default_output")
p0_saved = cov2.get((0, 0))
w0_saved = cov2.get((0, 1))
a0_saved = cov2.get((0, 2))
print("Nearest-neighbor peak:")
print(" position = %f +/- %f" % p0_saved)
print(" width == %f +/- %f" % w0_saved)
print(" area = = %f +/- %f" % a0_saved)
print(" Covariance(width, area) = ", cov2.getcovariance((0, 1), (0, 2)))
print("\n ---------- Alternate Parameterizations ---------")
# Different Parameterizations
# Peaks and baselines may have equivalent parameterizations that are useful
# in different situations. For example, the types defined by the
# GaussianOverR peak function are:
# "internal" - Used in diffpy.srmise calculations, explicitly enforces a
# maximum peak width
# "pwa" - The position, width (full-width at half-maximum), area.
# "mu_sigma_area" - The position, width (the distribution standard
# deviation sigma), area.
# "default_output" - Defines default format to use in most user-facing
# scenarios. Maps to the "pwa" parameterization.
# "default_input" - Defines default format to use when specifying peak
# parameters. Maps to the "internal" parameterization.
# All diffpy.srmise peak and baseline functions are required to have the
# "internal", "default_output", and "default_input" formats. In many
# cases, such as polynomial baselines, all of these are equivalent.
#
# Suppose you want to know peak widths in terms of the standard deviation
# sigma of the Gaussian distribution. It is then appropriate to convert
# all peaks to the "mu_sigma_area" format. Valid options for the "parts"
# keyword are "peaks", "baseline", or a sequence of indices (e.g. [1,2,3]
# would transform the second, third, and fourth peaks). If the keyword
# is omitted, the transformation is attempted for all parts of the fit.
cov.transform(in_format="pwa", out_format="mu_sigma_area", parts="peaks")
print("Width (sigma) of nearest-neighbor peak: %f +/- %f" % cov.get((0, 1)))
print("\n ------------ Highly Correlated Parameters ------------")
# Highly-correlated parameters can indicate difficulties constraining the
# fit. This function lists all pairs of parameters with an absolute value
# of correlation which exceeds a given threshold.
print("|Correlation| > 0.9:")
print("par1 par2 corr(par1, par2)")
print("\n".join(str(c) for c in cov.correlationwarning(0.9)))
print("\n-------- Estimate coordination shell occupancy ---------")
# Estimate the scale factor and its uncertainty from first peak's intensity.
# G_normalized = scale * G_observed
# dscale = scale * dG_observed/G_observed
scale = 12.0 / a0[0]
dscale = scale * a0[1] / a0[0]
print("Estimate scale factor assuming nearest-neighbor intensity = 12")
print("Scale factor is %f +/- %f" % (scale, dscale))
# Reference for number of atoms in coordination shells for FCC.
# http://chem-faculty.lsu.edu/watkins/MERLOT/cubic_neighbors/cubic_near_neighbors.html
ideal_intensity = [12, 6, 24, 12, 24, 8, 48, 6, 36, 24, 24, 24]
# Calculated the scaled intensities and uncertainties.
intensity = []
for i in range(0, len(cov.model) - 1):
(area, darea) = cov.get((i, 2))
area *= scale
darea = area * np.sqrt((dscale / scale) ** 2 + (darea / area) ** 2)
intensity.append((ideal_intensity[i], area, darea))
print("\nIntensity")
print("Ideal: Estimated")
for i in intensity:
print("%i: %f +/- %f" % i)
print("\nTotal intensity")
# It is possible to iterate over peaks directly without using indices.
# In addition, peak parameters can be accessed using string keys. For the
# Gaussian over r all of "position", "width", and "area" are valid.
total_observed_intensity = 0
total_ideal_intensity = 0
for peak, ii in zip(cov.model[:-1], ideal_intensity):
total_observed_intensity += scale * peak["area"]
total_ideal_intensity += ii
print("Ideal: Observed (using estimated scale factor)")
print("%i: %f" % (total_ideal_intensity, total_observed_intensity))
# Save output
ppe.write("output/query_results.srmise")
ppe.writepwa("output/query_results.pwa")
# Evaluating a model.
# Although the ModelCovariance object is useful, the model used for fitting
# can be directly accessed through PDFPeakExtraction as well, albeit
# without uncertainties. This is particularly helpful when evaluating a
# model since the parameters stay in the "internal" format used for
# calculations. For example, here we plot the data and every second peak
# on an arbitrary grid. Unlike with ModelCovariance, the baseline and
# peaks are kept separate.
if plot:
plt.figure()
grid = np.arange(2, 10, 0.01)
bl = ppe.extracted.baseline
everysecondpeak = ppe.extracted.model[::2]
plt.plot(ppe.x, ppe.y, "o")
for peak in everysecondpeak:
plt.plot(grid, bl.value(grid) + peak.value(grid))
plt.xlim(2, 10)
plt.show()
if __name__ == "__main__":
run()