Merge remote-tracking branch 'upstream/master' into python-2.7

DataMonk2017 · DataMonk2017 · commit 2f6c0be39cdc · 2018-02-25T22:09:54.000-08:00
diff --git a/docs/4.朴素贝叶斯.md b/docs/4.朴素贝叶斯.md
@@ -548,7 +548,7 @@ def spamTest():
 收集数据: 从 RSS 源收集内容，这里需要对 RSS 源构建一个接口
 准备数据: 将文本文件解析成词条向量
 分析数据: 检查词条确保解析的正确性
-训练算法: 使用我们之前简历的 trainNB0() 函数
+训练算法: 使用我们之前建立的 trainNB0() 函数
 测试算法: 观察错误率，确保分类器可用。可以修改切分程序，以降低错误率，提高分类结果
 使用算法: 构建一个完整的程序，封装所有内容。给定两个 RSS 源，改程序会显示最常用的公共词
 ```
@@ -601,7 +601,7 @@ def textParse(bigString):
 
 > 分析数据: 检查词条确保解析的正确性
 
-> 训练算法: 使用我们之前简历的 trainNB0() 函数
+> 训练算法: 使用我们之前建立的 trainNB0() 函数
 
 ```python
 def trainNB0(trainMatrix, trainCategory):
diff --git a/docs/5.Logistic回归.md b/docs/5.Logistic回归.md
@@ -237,8 +237,8 @@ def gradAscent(dataMatIn, classLabels):
     # weights 代表回归系数， 此处的 ones((n,1)) 创建一个长度和特征数相同的矩阵，其中的数全部都是 1
     weights = ones((n,1))
     for k in range(maxCycles):              #heavy on matrix operations
-        # m*3 的矩阵 * 3*1 的单位矩阵 ＝ m*1的矩阵
-        # 那么乘上单位矩阵的意义，就代表：通过公式得到的理论值
+        # m*3 的矩阵 * 3*1 的矩阵 ＝ m*1的矩阵
+        # 那么乘上矩阵的意义，就代表：通过公式得到的理论值
         # 参考地址： 矩阵乘法的本质是什么？ https://www.zhihu.com/question/21351965/answer/31050145
         # print 'dataMatrix====', dataMatrix 
         # print 'weights====', weights
@@ -255,6 +255,7 @@ def gradAscent(dataMatIn, classLabels):
 大家看到这儿可能会有一些疑惑，就是，我们在迭代中更新我们的回归系数，后边的部分是怎么计算出来的？为什么会是 alpha * dataMatrix.transpose() * error ?因为这就是我们所求的梯度，也就是对 f(w) 对 w 求一阶导数。具体推导如下:
 
 ![f(w)对w求一阶导数](../images/5.Logistic/LR_21.png)
+可参考http://blog.csdn.net/achuo/article/details/51160101
 
 > 测试算法: 使用 Logistic 回归进行分类
 
diff --git a/docs/6.支持向量机.md b/docs/6.支持向量机.md
@@ -63,7 +63,7 @@ Support Vector Machines: Slide 12 Copyright © 2001, 2003, Andrew W. Moore Why M
 
 * 分隔超平面`函数间距`:  \\(y(x)=w^Tx+b\\)
 * 分类的结果： \\(f(x)=sign(w^Tx+b)\\)  (sign表示>0为1，<0为-1，=0为0) 
-* 点到超平面的`几何间距`: \\(d(x)=(w^Tx+b)/||w||\\)  （||w||表示w矩阵的二范式=> \\(\sqrt{w*w^T}\\), 点到超平面的距离也是类似的）
+* 点到超平面的`几何间距`: \\(d(x)=(w^Tx+b)/||w||\\)  （||w||表示w矩阵的二范数=> \\(\sqrt{w^T*w}\\), 点到超平面的距离也是类似的）
 
 ![点到直线的几何距离](../images/6.SVM/SVM_4_point2line-distance.jpg)
 
diff --git a/src/python/4.NaiveBayes/bayes.py b/src/python/4.NaiveBayes/bayes.py
@@ -23,7 +23,7 @@ def loadDataSet():
     postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'], #[0,0,1,1,1......]
                    ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
                    ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
-                   ['stop', 'posting', 'stupid', 'worthless', 'gar e'],
+                   ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
                    ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
                    ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
     classVec = [0, 1, 0, 1, 0, 1]  # 1 is abusive, 0 not
@@ -321,7 +321,7 @@ def localWords(feed1,feed0):
     for docIndex in trainingSet:
         trainMat.append(bagOfWords2VecMN(vocabList,docList[docIndex]))
         trainClasses.append(classList[docIndex])
-    p0V,p1V,pSpam=trainNBO(array(trainMat),array(trainClasses))
+    p0V,p1V,pSpam=trainNB0(array(trainMat),array(trainClasses))
     errorCount=0
     for docIndex in testSet:
         wordVector=bagOfWords2VecMN(vocabList,docList[docIndex])