3
3
# @Date: 15-11-16
4
4
5
5
# @Last modified by: wendesi
6
- # @Last modified time: 15 -11-16
6
+ # @Last modified time: 17 -11-16
7
7
8
8
import cv2
9
9
import time
18
18
sign_time_count = 0
19
19
20
20
class Sign (object ):
21
+ '''
22
+ 阈值分类器
23
+
24
+ 有两种方向,
25
+ 1)x<v y=1
26
+ 2) x>v y=1
27
+ v 是阈值轴
28
+
29
+ 因为是针对已经二值化后的MNIST数据集,所以v的取值只有3个 {0,1,2}
30
+ '''
31
+
21
32
def __init__ (self ,features ,labels ,w ):
22
- self .X = features
23
- self .Y = labels
24
- self .N = len (labels )
33
+ self .X = features # 训练数据特征
34
+ self .Y = labels # 训练数据的标签
35
+ self .N = len (labels ) # 训练数据大小
25
36
26
- self .w = w
37
+ self .w = w # 训练数据权值分布
27
38
28
- self .indexes = [0 ,1 ,2 ]
39
+ self .indexes = [0 ,1 ,2 ] # 阈值轴可选范围
29
40
30
41
def _train_less_than_ (self ):
42
+ '''
43
+ 寻找(x<v y=1)情况下的最优v
44
+ '''
45
+
31
46
index = - 1
32
47
error_score = 1000000
33
48
@@ -50,6 +65,10 @@ def _train_less_than_(self):
50
65
51
66
52
67
def _train_more_than_ (self ):
68
+ '''
69
+ 寻找(x>v y=1)情况下的最优v
70
+ '''
71
+
53
72
index = - 1
54
73
error_score = 1000000
55
74
@@ -106,21 +125,29 @@ def __init__(self):
106
125
pass
107
126
108
127
def _init_parameters_ (self ,features ,labels ):
109
- self .X = features
110
- self .Y = labels
128
+ self .X = features # 训练集特征
129
+ self .Y = labels # 训练集标签
111
130
112
- self .n = len (features [0 ])
113
- self .N = len (features )
114
- self .M = 60 # 分类器数目
131
+ self .n = len (features [0 ]) # 特征维度
132
+ self .N = len (features ) # 训练集大小
133
+ self .M = 10 # 分类器数目
115
134
116
- self .w = [1.0 / self .N ]* self .N
117
- self .alpha = []
118
- self .classifier = []
135
+ self .w = [1.0 / self .N ]* self .N # 训练集的权值分布
136
+ self .alpha = [] # 分类器系数 公式8.2
137
+ self .classifier = [] # (维度,分类器),针对当前维度的分类器
119
138
120
139
def _w_ (self ,index ,classifier ,i ):
140
+ '''
141
+ 公式8.4不算Zm
142
+ '''
143
+
121
144
return self .w [i ]* math .exp (- self .alpha [- 1 ]* self .Y [i ]* classifier .predict (self .X [i ][index ]))
122
145
123
146
def _Z_ (self ,index ,classifier ):
147
+ '''
148
+ 公式8.5
149
+ '''
150
+
124
151
Z = 0
125
152
126
153
for i in xrange (self .N ):
@@ -138,7 +165,7 @@ def train(self,features,labels):
138
165
time1 = time .time ()
139
166
map_time = 0
140
167
141
- best_classifier = (100000 ,None ,None ) #(误差率,分类器, 针对的特征)
168
+ best_classifier = (100000 ,None ,None ) #(误差率,针对的特征,分类器 )
142
169
for i in xrange (self .n ):
143
170
map_time -= time .time ()
144
171
features = map (lambda x :x [i ],self .X )
@@ -168,6 +195,7 @@ def train(self,features,labels):
168
195
169
196
Z = self ._Z_ (best_classifier [1 ],best_classifier [2 ])
170
197
198
+ # 计算训练集权值分布 8.4
171
199
for i in xrange (self .N ):
172
200
self .w [i ] = self ._w_ (best_classifier [1 ],best_classifier [2 ],i )/ Z
173
201
0 commit comments