-
Notifications
You must be signed in to change notification settings - Fork 6.4k
/
Copy pathsites.py
34 lines (29 loc) · 973 Bytes
/
sites.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# https://deeplearningcourses.com/c/unsupervised-machine-learning-hidden-markov-models-in-python
# https://udemy.com/unsupervised-machine-learning-hidden-markov-models-in-python
# http://lazyprogrammer.me
# Create a Markov model for site data.
from __future__ import print_function, division
from future.utils import iteritems
import numpy as np
transitions = {}
row_sums = {}
# collect counts
for line in open('site_data.csv'):
s, e = line.rstrip().split(',')
transitions[(s, e)] = transitions.get((s, e), 0.) + 1
row_sums[s] = row_sums.get(s, 0.) + 1
# normalize
for k, v in iteritems(transitions):
s, e = k
transitions[k] = v / row_sums[s]
# initial state distribution
print("initial state distribution:")
for k, v in iteritems(transitions):
s, e = k
if s == '-1':
print(e, v)
# which page has the highest bounce?
for k, v in iteritems(transitions):
s, e = k
if e == 'B':
print("bounce rate for %s: %s" % (s, v))