-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnode.py
50 lines (38 loc) · 1.97 KB
/
node.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
import numpy as np
from prob_func import ProbFunc
from intermediate_results import IntermediateResults
class Node(object):
''' A node is a representation of a random variable together with its pars'''
def __init__(self, X, feature_name, feature_parents, intermediate_results, features_type={}):
''' A node object holds joint distribution of ProbFunc class '''
''' To compute P(X|par_X) = P(X,par_X) / P(par_x)'''
''' Node object therefore has to store two joint distribution '''
''' X are training data in the form of data frame'''
if len(feature_parents) == 0:
assert isinstance(X, pd.Series) or isinstance(X, pd.DataFrame), "expecting a series or a dataframe"
else:
assert isinstance(X, pd.DataFrame), "expecting a dataframe"
self.training_df = X
self.features_type = features_type
self.feature_name = feature_name
self.feature_parents = feature_parents
self.intermediate_results = intermediate_results
self.par_dist = ProbFunc(self.training_df, self.feature_parents, self.intermediate_results, self.features_type)
self.joint_dist = ProbFunc(self.training_df, self.feature_parents + [self.feature_name], self.intermediate_results,
self.features_type)
def fit(self):
self.par_dist.fit()
self.joint_dist.fit()
def compute_ll(self, X, discrete_val=-1):
return self.joint_dist.compute_ll(X, discrete_val) - self.par_dist.compute_ll(X, discrete_val)
if __name__ == '__main__':
test = pd.DataFrame({"a": np.arange(9), "b": [0 if i < 5 else 1 for i in range(9)],
"c": np.random.randn(9)})
prep_res = IntermediateResults(test)
feature_name = "a"
feature_parents = ["b", "c"]
features_type = {"a": "c", "b": "d", "c": "c"}
node = Node(test, feature_name, feature_parents, prep_res, features_type)
node.fit()
print node.compute_ll(test)