@@ -100,7 +100,7 @@ def _init_parameters_(self,features,labels):
100
100
101
101
self .n = len (features [0 ])
102
102
self .N = len (features )
103
- self .M = 10000 # 分类器数目
103
+ self .M = 100000 # 分类器数目
104
104
105
105
self .w = [1.0 / self .N ]* self .N
106
106
self .alpha = []
@@ -170,11 +170,34 @@ def predict(self,features):
170
170
return results
171
171
172
172
if __name__ == '__main__' :
173
- features = [[0 ],[1 ],[2 ],[3 ],[4 ],[5 ],[6 ],[7 ],[8 ],[9 ]]
174
- labels = [1 ,1 ,1 ,- 1 ,- 1 ,- 1 ,1 ,1 ,1 ,- 1 ]
173
+ print 'Start read data'
175
174
175
+ time_1 = time .time ()
176
176
177
+ raw_data = pd .read_csv ('../data/train_binary.csv' ,header = 0 )
178
+ data = raw_data .values
177
179
180
+ imgs = data [0 ::,1 ::]
181
+ labels = data [::,0 ]
178
182
183
+
184
+ # 选取 2/3 数据作为训练集, 1/3 数据作为测试集
185
+ train_features , test_features , train_labels , test_labels = train_test_split (imgs , labels , test_size = 0.33 , random_state = 23323 )
186
+
187
+ time_2 = time .time ()
188
+ print 'read data cost ' ,time_2 - time_1 ,' second' ,'\n '
189
+
190
+ print 'Start training'
179
191
ada = AdaBoost ()
180
- ada .train (features ,labels )
192
+ ada .train (train_features , train_labels )
193
+
194
+ time_3 = time .time ()
195
+ print 'training cost ' ,time_3 - time_2 ,' second' ,'\n '
196
+
197
+ print 'Start predicting'
198
+ test_predict = ada .predict (test_features )
199
+ time_4 = time .time ()
200
+ print 'predicting cost ' ,time_4 - time_3 ,' second' ,'\n '
201
+
202
+ score = accuracy_score (test_labels ,test_predict )
203
+ print "The accruacy socre is " , score
0 commit comments