Skip to content

Commit a7ee33f

Browse files
2 parents c35db2e + 1e59a11 commit a7ee33f

File tree

8 files changed

+54
-10
lines changed

8 files changed

+54
-10
lines changed

docs/11.使用Apriori算法进行关联分析.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Apriori 算法流程步骤:
6060
* 分析数据:使用任意方法。
6161
* 训练数据:使用Apiori算法来找到频繁项集。
6262
* 测试算法:不需要测试过程。
63-
* 使用算法:用语发现频繁项集以及物品之间的关联规则
63+
* 使用算法:用于发现频繁项集以及物品之间的关联规则
6464
```
6565

6666
## Apriori 算法的使用

src/py2.x/ML/15.BigData_MapReduce/proximalSVM.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66
@author: Peter/ApacheCN-xy/片刻
77
《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning
88
'''
9+
import base64
10+
import pickle
11+
912
import numpy
1013

14+
1115
def map(key, value):
1216
# input key= class for one training example, e.g. "-1.0"
1317
classes = [float(item) for item in key.split(",")] # e.g. [-1.0]

src/py2.x/ML/3.DecisionTree/DecisionTree.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,9 @@ def fishTest():
327327
print myTree
328328
# [1, 1]表示要取的分支上的节点位置,对应的结果值
329329
print classify(myTree, labels, [1, 1])
330+
331+
# 获得树的高度
332+
print get_tree_height(myTree)
330333

331334
# 画图可视化展现
332335
dtPlot.createPlot(myTree)
@@ -353,6 +356,32 @@ def ContactLensesTest():
353356
print lensesTree
354357
# 画图可视化展现
355358
dtPlot.createPlot(lensesTree)
359+
360+
361+
def get_tree_height(tree):
362+
"""
363+
Desc:
364+
递归获得决策树的高度
365+
Args:
366+
tree
367+
Returns:
368+
树高
369+
"""
370+
371+
if not isinstance(tree, dict):
372+
return 1
373+
374+
child_trees = tree.values()[0].values()
375+
376+
# 遍历子树, 获得子树的最大高度
377+
max_height = 0
378+
for child_tree in child_trees:
379+
child_tree_height = get_tree_height(child_tree)
380+
381+
if child_tree_height > max_height:
382+
max_height = child_tree_height
383+
384+
return max_height + 1
356385

357386

358387
if __name__ == "__main__":

src/py3.x/16.RecommenderSystems/test_evaluation_model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import math
2+
import random
3+
14

25
def SplitData(data, M, k, seed):
36
test = []
@@ -69,4 +72,3 @@ def Popularity(train, test, N):
6972
n += 1
7073
ret /= n * 1.0
7174
return ret
72-

src/py3.x/16.RecommenderSystems/test_基于物品.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import math
2+
from operator import itemgetter
3+
14

25
def ItemSimilarity1(train):
36
#calculate co-rated users between items
@@ -28,7 +31,7 @@ def ItemSimilarity2(train):
2831
N[i] += 1
2932
for j in users:
3033
if i == j:
31-
continue
34+
continue
3235
C[i][j] += 1 / math.log(1 + len(items) * 1.0)
3336

3437
#calculate finial similarity matrix W
@@ -60,5 +63,3 @@ def Recommendation2(train, user_id, W, K):
6063
rank[j].weight += pi * wj
6164
rank[j].reason[i] = pi * wj
6265
return rank
63-
64-

src/py3.x/16.RecommenderSystems/test_基于用户.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import math
2+
from operator import itemgetter
3+
14

25
def UserSimilarity1(train):
36
W = dict()
@@ -75,4 +78,3 @@ def Recommend(user, train, W):
7578
continue
7679
rank[i] += wuv * rvi
7780
return rank
78-

src/py3.x/ML/15.BigData_MapReduce/proximalSVM.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66
@author: Peter/ApacheCN-xy/片刻
77
《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning
88
'''
9+
import base64
10+
import pickle
11+
912
import numpy
1013

14+
1115
def map(key, value):
1216
# input key= class for one training example, e.g. "-1.0"
1317
classes = [float(item) for item in key.split(",")] # e.g. [-1.0]
@@ -19,16 +23,16 @@ def map(key, value):
1923

2024
# create matrix E and vector e
2125
e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1))
22-
E = numpy.matrix(numpy.append(A, -e, axis=1))
26+
E = numpy.matrix(numpy.append(A, -e, axis=1))
2327

2428
# create a tuple with the values to be used by reducer
2529
# and encode it with base64 to avoid potential trouble with '\t' and '\n' used
2630
# as default separators in Hadoop Streaming
27-
producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e))
31+
producedvalue = base64.b64encode(pickle.dumps((E.T*E, E.T*D*e)))
2832

2933
# note: a single constant key "producedkey" sends to only one reducer
3034
# somewhat "atypical" due to low degree of parallism on reducer side
31-
print "producedkey\t%s" % (producedvalue)
35+
print("producedkey\t%s" % (producedvalue))
3236

3337
def reduce(key, values, mu=0.1):
3438
sumETE = None
@@ -52,4 +56,4 @@ def reduce(key, values, mu=0.1):
5256
# note: omega = result[:-1] and gamma = result[-1]
5357
# but printing entire vector as output
5458
result = sumETE.I*sumETDe
55-
print "%s\t%s" % (key, str(result.tolist()))
59+
print("%s\t%s" % (key, str(result.tolist())))

src/py3.x/ML/8.Regression/regression.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,3 +582,5 @@ def regression5():
582582
# regression3()
583583
# regression4()
584584
# regression5()
585+
pass
586+

0 commit comments

Comments
 (0)