Skip to content

Commit 4df7935

Browse files
更新文件路径到input目录下
1 parent 1eecf70 commit 4df7935

File tree

15 files changed

+52
-52
lines changed

15 files changed

+52
-52
lines changed
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

output/03.DecisionTree/tree.pdf

-46 Bytes
Binary file not shown.

src/python/03.DecisionTree/DTSklearn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def createDataSet():
1212
''' 数据读入 '''
1313
data = []
1414
labels = []
15-
with open("testData/DT_data.txt") as ifile:
15+
with open("input/03.DecisionTree/data.txt") as ifile:
1616
for line in ifile:
1717
# 特征: 身高 体重 label: 胖瘦
1818
tokens = line.strip().split(' ')

src/python/07.AdaBoost/adaboost.py

+44-44
Original file line numberDiff line numberDiff line change
@@ -258,47 +258,47 @@ def plotROC(predStrengths, classLabels):
258258

259259

260260
if __name__ == "__main__":
261-
# 我们要将5个点进行分类
262-
dataArr, labelArr = loadSimpData()
263-
print 'dataArr', dataArr, 'labelArr', labelArr
264-
265-
# D表示最初值,对1进行均分为5份,平均每一个初始的概率都为0.2
266-
# D的目的是为了计算错误概率: weightedError = D.T*errArr
267-
D = mat(ones((5, 1))/5)
268-
print 'D=', D.T
269-
270-
# bestStump, minError, bestClasEst = buildStump(dataArr, labelArr, D)
271-
# print 'bestStump=', bestStump
272-
# print 'minError=', minError
273-
# print 'bestClasEst=', bestClasEst.T
274-
275-
276-
# 分类器:weakClassArr
277-
# 历史累计的分类结果集
278-
weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 9)
279-
print '\nweakClassArr=', weakClassArr, '\naggClassEst=', aggClassEst.T
280-
281-
"""
282-
发现:
283-
分类的权重值:最大的值,为alpha的加和,最小值为-最大值
284-
特征的权重值:如果一个值误判的几率越小,那么D的特征权重越少
285-
"""
286-
287-
# 测试数据的分类结果, 观测:aggClassEst分类的最终权重
288-
print adaClassify([0, 0], weakClassArr).T
289-
print adaClassify([[5, 5], [0, 0]], weakClassArr).T
290-
291-
# # 马疝病数据集
292-
# # 训练集合
293-
# dataArr, labelArr = loadDataSet("testData/AB_horseColicTraining2.txt")
294-
# weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 40)
295-
# print weakClassArr, '\n-----\n', aggClassEst.T
296-
# # 计算ROC下面的AUC的面积大小
297-
# plotROC(aggClassEst.T, labelArr)
298-
# # 测试集合
299-
# dataArrTest, labelArrTest = loadDataSet("testData/AB_horseColicTest2.txt")
300-
# m = shape(dataArrTest)[0]
301-
# predicting10 = adaClassify(dataArrTest, weakClassArr)
302-
# errArr = mat(ones((m, 1)))
303-
# # 测试:计算总样本数,错误样本数,错误率
304-
# print m, errArr[predicting10 != mat(labelArrTest).T].sum(), errArr[predicting10 != mat(labelArrTest).T].sum()/m
261+
# # 我们要将5个点进行分类
262+
# dataArr, labelArr = loadSimpData()
263+
# print 'dataArr', dataArr, 'labelArr', labelArr
264+
265+
# # D表示最初值,对1进行均分为5份,平均每一个初始的概率都为0.2
266+
# # D的目的是为了计算错误概率: weightedError = D.T*errArr
267+
# D = mat(ones((5, 1))/5)
268+
# print 'D=', D.T
269+
270+
# # bestStump, minError, bestClasEst = buildStump(dataArr, labelArr, D)
271+
# # print 'bestStump=', bestStump
272+
# # print 'minError=', minError
273+
# # print 'bestClasEst=', bestClasEst.T
274+
275+
276+
# # 分类器:weakClassArr
277+
# # 历史累计的分类结果集
278+
# weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 9)
279+
# print '\nweakClassArr=', weakClassArr, '\naggClassEst=', aggClassEst.T
280+
281+
# """
282+
# 发现:
283+
# 分类的权重值:最大的值,为alpha的加和,最小值为-最大值
284+
# 特征的权重值:如果一个值误判的几率越小,那么D的特征权重越少
285+
# """
286+
287+
# # 测试数据的分类结果, 观测:aggClassEst分类的最终权重
288+
# print adaClassify([0, 0], weakClassArr).T
289+
# print adaClassify([[5, 5], [0, 0]], weakClassArr).T
290+
291+
# 马疝病数据集
292+
# 训练集合
293+
dataArr, labelArr = loadDataSet("input/07.AdaBoost/horseColicTraining2.txt")
294+
weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 40)
295+
print weakClassArr, '\n-----\n', aggClassEst.T
296+
# 计算ROC下面的AUC的面积大小
297+
plotROC(aggClassEst.T, labelArr)
298+
# 测试集合
299+
dataArrTest, labelArrTest = loadDataSet("input/07.AdaBoost/horseColicTest2.txt")
300+
m = shape(dataArrTest)[0]
301+
predicting10 = adaClassify(dataArrTest, weakClassArr)
302+
errArr = mat(ones((m, 1)))
303+
# 测试:计算总样本数,错误样本数,错误率
304+
print m, errArr[predicting10 != mat(labelArrTest).T].sum(), errArr[predicting10 != mat(labelArrTest).T].sum()/m

src/python/09.RegTrees/regTrees.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -290,37 +290,37 @@ def createForeCast(tree, testData, modelEval=regTreeEval):
290290
# print mat0, '\n-----------\n', mat1
291291

292292
# # 回归树
293-
# myDat = loadDataSet('testData/RT_data1.txt')
294-
# # myDat = loadDataSet('testData/RT_data2.txt')
293+
# myDat = loadDataSet('input/09.RegTrees/data1.txt')
294+
# # myDat = loadDataSet('input/09.RegTrees/data2.txt')
295295
# # print 'myDat=', myDat
296296
# myMat = mat(myDat)
297297
# # print 'myMat=', myMat
298298
# myTree = createTree(myMat)
299299
# print myTree
300300

301301
# # 1. 预剪枝就是:提起设置最大误差数和最少元素数
302-
# myDat = loadDataSet('testData/RT_data3.txt')
302+
# myDat = loadDataSet('input/09.RegTrees/data3.txt')
303303
# myMat = mat(myDat)
304304
# myTree = createTree(myMat, ops=(0, 1))
305305
# print myTree
306306

307307
# # 2. 后剪枝就是:通过测试数据,对预测模型进行合并判断
308-
# myDatTest = loadDataSet('testData/RT_data3test.txt')
308+
# myDatTest = loadDataSet('input/09.RegTrees/data3test.txt')
309309
# myMat2Test = mat(myDatTest)
310310
# myFinalTree = prune(myTree, myMat2Test)
311311
# print '\n\n\n-------------------'
312312
# print myFinalTree
313313

314314
# # --------
315315
# # 模型树求解
316-
# myDat = loadDataSet('testData/RT_data4.txt')
316+
# myDat = loadDataSet('input/09.RegTrees/data4.txt')
317317
# myMat = mat(myDat)
318318
# myTree = createTree(myMat, modelLeaf, modelErr)
319319
# print myTree
320320

321321
# 回归树 VS 模型树 VS 线性回归
322-
trainMat = mat(loadDataSet('testData/RT_bikeSpeedVsIq_train.txt'))
323-
testMat = mat(loadDataSet('testData/RT_bikeSpeedVsIq_test.txt'))
322+
trainMat = mat(loadDataSet('input/09.RegTrees/bikeSpeedVsIq_train.txt'))
323+
testMat = mat(loadDataSet('input/09.RegTrees/bikeSpeedVsIq_test.txt'))
324324
# 回归树
325325
myTree1 = createTree(trainMat, ops=(1, 20))
326326
print myTree1

0 commit comments

Comments
 (0)