-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis.py
109 lines (101 loc) · 4.83 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
def compute_angle_ACF(angles, max_tau, step=1, sample_origin='independent', type='angle'):
'''
Compute the autocorrelation function avg(cos[theta(t)-theta(tau+t)])tau
see: van der Spoel, D. & Berendsen, H. J. Molecular dynamics simulations of Leu-enkephalin in water and DMSO.
Biophys. J. 72, 2032–2041 (1997)
Parameters
----------
angles : np.array
M x N where M is the number of trajectory frames the angles are taken from
and N is the number of dihedral angles.
Angles are expected to be in degrees.
max_tau : int
The maximum time lag (in frames) to evaluate the function to.
step : int
The tau interval. If step = 2 then the correlation function is evaluated at tau = 0, 2, 4, 6, 8 .....
sample_origin : string
options are
'all' : function is evaluated for origin time (t) of every frame (or row) in angles
'independent' : function is evaluted for frames (or rows) equal to 0, tau, ...., ((M/tau)-1)tau
#TODO offer intermediate origins
type : string
options are
'angle' : inputs are expected to be angles (degrees). cos(angles[sample_origin] - angles[sample_origin+tau]) is computed.
'vector' : inputs are expected to be vectors (taken from the cross product of two bond orientation vectors)
dot(vector[sample_origin], vector[sample_origin+tau]) is computed
'''
angles = np.radians(angles)
# get the number of trajectory frames
n_rows = len(angles)
# create an empty array that will have a number of rows
# equal to the number of time lag samples
# and a number of columns equal to the number of angles
ACF = np.zeros((int(max_tau/step), angles.shape[1]))
# first row contains cos of 0 = 1 (angle minus itself i.e. no lag)
ACF[0,:] = np.ones(angles.shape[1])
if step > 1:
start=step
else:
start=1
# for each time lag starting from "step" frame lags to the max number of lags in "step" size
for i, tau in enumerate(range(start,max_tau+1,step)):
# make an array that will contain a number of rows equal to the length of trajectory minus the time lag
# because the final data point will be tau frames back from the final frame
# eg 10 total rows and a tau of 2, there will be 8 rows to average because the final data point is cos(row 8 - row 10)
if sample_origin == 'independent':
n_samples = int((n_rows/tau)-1)
diffs = np.zeros((n_samples, angles.shape[1]))
#TODO diffs = np.zeros((angles.shape[1]))
coef = tau
else:
n_samples = int(n_rows-tau) # check that final output doesn't have empty last row (-1 here also)
diffs = np.zeros((n_samples, angles.shape[1]))
#TODO diffs = np.zeros((angles.shape[1]))
coef = 1
# loop over n_samples origins
# TODO - just keep running sum and divide by n_samples to save memory
for record, t in enumerate(range(n_samples)):
sample_origin = t*coef
# get the cos of the difference between angle at time t and t+tau
if type == 'angle':
diffs[record,:] = np.cos(angles[sample_origin] - angles[sample_origin+tau])
elif type == 'vector':
# row wise dot product
diffs[record,:] = np.sum(angles[sample_origin]*angles[sample_origin+tau], axis=1)
#TODO diffs += np.sum(angles[sample_origin]*angles[sample_origin+tau], axis=1)
# after they've all been recorded, get the means for each angle and
# record them on the row of ACF corresponding to this iterations tau value
ACF[i,:] = diffs.mean(axis=0)
#TODO ACF[i,:] = diffs/n_samples
return ACF
def get_average_angle(angles):
'''
sum_i_from_1_to_N sin(a[i])
a = arctangent ---------------------------
sum_i_from_1_to_N cos(a[i])
'''
avg_angle = np.arctan(np.sin(angles.sum(axis=0)/np.cos(angles).sum(axis=0)))
return avg_angle
def scale_to_unit(vector):
'''
assuming 2d input shape
n x 3 (xyz)
'''
return vector/np.linalg.norm(vector,axis=1,keepdims=True)
def get_bond_vector_cross_products(ag,atoms=('H','N','CA')):
'''
ag : mda.Universe AtomGroup containing the atoms from the residues of interest.
'''
cross_products = []
residues = ag.residues
# get the row indices corresponding to the atoms
i = np.where(residues.atoms.names == atoms[0])[0]
j = np.where(residues.atoms.names == atoms[1])[0]
k = np.where(residues.atoms.names == atoms[2])[0]
# get the position vectors for the atoms
i,j,k = residues.atoms.positions[[i,j,k]].squeeze()
# cross product
n = scale_to_unit(np.cross(i-j, j-k))
cross_products.append(n)
return cross_products