Skip to content

Commit 271399a

Browse files
author
Lazy Programmer
committed
python 3
1 parent e4d5fb7 commit 271399a

14 files changed

+357
-286
lines changed

hmm_class/frost.py

+28-9
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,36 @@
22
# https://udemy.com/unsupervised-machine-learning-hidden-markov-models-in-python
33
# http://lazyprogrammer.me
44
# Model and generate Robert Frost poems.
5+
from __future__ import print_function, division
6+
from future.utils import iteritems
7+
from builtins import range
8+
# Note: you may need to update your version of future
9+
# sudo pip install -U future
10+
11+
512

613
import numpy as np
714
import string
15+
import sys
16+
817

918
initial = {} # start of a phrase
1019
second_word = {}
1120
transitions = {}
1221

13-
def remove_punctuation(s):
22+
# unfortunately these work different ways
23+
def remove_punctuation_2(s):
1424
return s.translate(None, string.punctuation)
1525

26+
def remove_punctuation_3(s):
27+
return s.translate(str.maketrans('','',string.punctuation))
28+
29+
if sys.version.startswith('2'):
30+
remove_punctuation = remove_punctuation_2
31+
else:
32+
remove_punctuation = remove_punctuation_3
33+
34+
1635
def add2dict(d, k, v):
1736
if k not in d:
1837
d[k] = []
@@ -22,7 +41,7 @@ def add2dict(d, k, v):
2241
tokens = remove_punctuation(line.rstrip().lower()).split()
2342

2443
T = len(tokens)
25-
for i in xrange(T):
44+
for i in range(T):
2645
t = tokens[i]
2746
if i == 0:
2847
# measure the distribution of the first word
@@ -43,7 +62,7 @@ def add2dict(d, k, v):
4362

4463
# normalize the distributions
4564
initial_total = sum(initial.values())
46-
for t, c in initial.iteritems():
65+
for t, c in iteritems(initial):
4766
initial[t] = c / initial_total
4867

4968
def list2pdict(ts):
@@ -52,15 +71,15 @@ def list2pdict(ts):
5271
n = len(ts)
5372
for t in ts:
5473
d[t] = d.get(t, 0.) + 1
55-
for t, c in d.iteritems():
74+
for t, c in iteritems(d):
5675
d[t] = c / n
5776
return d
5877

59-
for t_1, ts in second_word.iteritems():
78+
for t_1, ts in iteritems(second_word):
6079
# replace list with dictionary of probabilities
6180
second_word[t_1] = list2pdict(ts)
6281

63-
for k, ts in transitions.iteritems():
82+
for k, ts in iteritems(transitions):
6483
transitions[k] = list2pdict(ts)
6584

6685
# generate 4 lines
@@ -69,14 +88,14 @@ def sample_word(d):
6988
p0 = np.random.random()
7089
# print "p0:", p0
7190
cumulative = 0
72-
for t, p in d.iteritems():
91+
for t, p in iteritems(d):
7392
cumulative += p
7493
if p0 < cumulative:
7594
return t
7695
assert(False) # should never get here
7796

7897
def generate():
79-
for i in xrange(4):
98+
for i in range(4):
8099
sentence =[]
81100

82101
# initial word
@@ -95,7 +114,7 @@ def generate():
95114
sentence.append(w2)
96115
w0 = w1
97116
w1 = w2
98-
print ' '.join(sentence)
117+
print(' '.join(sentence))
99118

100119
generate()
101120

hmm_class/generate_ht.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
# https://udemy.com/unsupervised-machine-learning-hidden-markov-models-in-python
33
# http://lazyprogrammer.me
44
# Generate discrete data from an HMM.
5+
from __future__ import print_function, division
6+
from future.utils import iteritems
7+
from builtins import range
8+
# Note: you may need to update your version of future
9+
# sudo pip install -U future
10+
11+
512
import numpy as np
613

714

@@ -16,7 +23,7 @@ def generate_sequence(N):
1623
s = np.random.choice(xrange(M), p=pi) # initial state
1724
x = np.random.choice(xrange(V), p=B[s]) # initial observation
1825
sequence = [x]
19-
for n in xrange(N-1):
26+
for n in range(N-1):
2027
s = np.random.choice(xrange(M), p=A[s]) # next state
2128
x = np.random.choice(xrange(V), p=B[s]) # next observation
2229
sequence.append(x)
@@ -25,7 +32,7 @@ def generate_sequence(N):
2532

2633
def main():
2734
with open('coin_data.txt', 'w') as f:
28-
for n in xrange(50):
35+
for n in range(50):
2936
sequence = generate_sequence(30)
3037
sequence = ''.join(symbol_map[s] for s in sequence)
3138
print sequence

hmm_class/hmm_classifier.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@
22
# https://udemy.com/unsupervised-machine-learning-hidden-markov-models-in-python
33
# http://lazyprogrammer.me
44
# Demonstrate how HMMs can be used for classification.
5+
from __future__ import print_function, division
6+
from future.utils import iteritems
7+
from builtins import range
8+
# Note: you may need to update your version of future
9+
# sudo pip install -U future
10+
11+
12+
513
import string
614
import numpy as np
715
import matplotlib.pyplot as plt
816

9-
from hmmd_theano import HMM
17+
from hmmd_theano2 import HMM
1018
from sklearn.utils import shuffle
1119
from nltk import pos_tag, word_tokenize
1220

@@ -18,14 +26,14 @@ def fit(self, X, Y, V):
1826
K = len(set(Y)) # number of classes - assume 0..K-1
1927
self.models = []
2028
self.priors = []
21-
for k in xrange(K):
29+
for k in range(K):
2230
# gather all the training data for this class
2331
thisX = [x for x, y in zip(X, Y) if y == k]
2432
C = len(thisX)
2533
self.priors.append(np.log(C))
2634

2735
hmm = HMM(5)
28-
hmm.fit(thisX, V=V, p_cost=0.1, print_period=1, learning_rate=1e-4, max_iter=100)
36+
hmm.fit(thisX, V=V, print_period=1, learning_rate=1e-2, max_iter=80)
2937
self.models.append(hmm)
3038

3139
def score(self, X, Y):
@@ -56,7 +64,7 @@ def get_data():
5664
for line in open(fn):
5765
line = line.rstrip()
5866
if line:
59-
print line
67+
print(line)
6068
# tokens = remove_punctuation(line.lower()).split()
6169
tokens = get_tags(line)
6270
if len(tokens) > 1:
@@ -69,26 +77,25 @@ def get_data():
6977
X.append(sequence)
7078
Y.append(label)
7179
count += 1
72-
print count
80+
print(count)
7381
if count >= 50:
7482
break
75-
print "Vocabulary:", word2idx.keys()
83+
print("Vocabulary:", word2idx.keys())
7684
return X, Y, current_idx
7785

7886

7987
def main():
8088
X, Y, V = get_data()
81-
# print "Finished loading data"
82-
print "len(X):", len(X)
83-
print "Vocabulary size:", V
89+
print("len(X):", len(X))
90+
print("Vocabulary size:", V)
8491
X, Y = shuffle(X, Y)
8592
N = 20 # number to test
8693
Xtrain, Ytrain = X[:-N], Y[:-N]
8794
Xtest, Ytest = X[-N:], Y[-N:]
8895

8996
model = HMMClassifier()
9097
model.fit(Xtrain, Ytrain, V)
91-
print "Score:", model.score(Xtest, Ytest)
98+
print("Score:", model.score(Xtest, Ytest))
9299

93100

94101
if __name__ == '__main__':

0 commit comments

Comments
 (0)