forked from diffpy/diffpy.srmise
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbase.py
132 lines (104 loc) · 4.75 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python
##############################################################################
#
# SrMise by Luke Granlund
# (c) 2014 trustees of the Michigan State University
# (c) 2024 trustees of Columbia University in the City of New York
# All rights reserved.
#
# File coded by: Luke Granlund
#
# See LICENSE.txt for license information.
#
##############################################################################
#
# Routines for analyzing and comparing the quality of models to the atomic
# pair distribution function. The total number of intrinsic peaks in the PDF
# is on the order of the number of atoms in the sample squared, and these
# overlap, so developing a model to the entire PDF is a terribly
# underconstrained problem. There are two primary considerations to make when
# interpreting peaks extracted from the PDF as significantly favored
# interatomic distances (i.e. few or no other interatomic distances appear
# between the maxima of an extracted peak and its neighbors):
# 1. The more ordered the system the more likely this interpretation is to be
# valid. In contrast, it is not appropriate to interpret peaks extracted
# from the PDF of amorphous structures in this way.
# 2. The number of overlapping peaks increases roughly as r^2, so peaks
# extracted at low r are more likely to correspond with this interpretation
# than those at high r.
# Several information theoretic methods are provided that penalize
# overfitting.
#
# Errors in the PDF are correlated within a short range, but at present all
# data points are considered independently distributed. Any provided errors
# on the PDF are also not yet considered.
#
# Model selection criteria:
# Akaike information criterion (AIC)
# Akaike information criterion w/ small sample correction (AICc)
#
#
########################################################################
import logging
import numpy as np
logger = logging.getLogger("diffpy.srmise")
class ModelEvaluator:
"""Class for evaluating the quality of a fit. Comparison between different
models of the same type is defined so that better models are 'greater than'
worse models."""
def __init__(self, method, higher_is_better):
"""method = name of method (string)
higher_is_better = boolean
"""
self.method = method
self.higher_is_better = higher_is_better
self.stat = None
self.chisq = None
return
def __lt__(self, other):
""" """
assert self.method == other.method # Comparison between same types required
assert self.stat is not None and other.stat is not None # The statistic must already be calculated
if self.higher_is_better is not None:
return self.stat < other.stat
else:
return other.stat < self.stat
def __le__(self, other):
""" """
assert self.method == other.method # Comparison between same types required
assert self.stat is not None and other.stat is not None # The statistic must already be calculated
if self.higher_is_better is not None:
return self.stat <= other.stat
else:
return other.stat <= self.stat
def __eq__(self, other):
""" """
assert self.method == other.method # Comparison between same types required
assert self.stat is not None and other.stat is not None # The statistic must already be calculated
return self.stat == other.stat
def __ne__(self, other):
""" """
assert self.method == other.method # Comparison between same types required
assert self.stat is not None and other.stat is not None # The statistic must already be calculated
return self.stat != other.stat
def __gt__(self, other):
""" """
assert self.method == other.method # Comparison between same types required
assert self.stat is not None and other.stat is not None # The statistic must already be calculated
if self.higher_is_better is not None:
return self.stat > other.stat
else:
return other.stat > self.stat
def __ge__(self, other):
""" """
assert self.method == other.method # Comparison between same types required
assert self.stat is not None and other.stat is not None # The statistic must already be calculated
if self.higher_is_better is not None:
return self.stat >= other.stat
else:
return other.stat >= self.stat
def chi_squared(self, expected, observed, error):
"""Calculates chi-squared statistic."""
self.chisq = np.sum((expected - observed) ** 2 / error**2)
return self.chisq
# end of class ModelEvaluator