Skip to content

Commit e8b0422

Browse files
committed
adaboost with threshold version complete
1 parent 2bc559f commit e8b0422

7 files changed

+307
-135
lines changed

.idea/workspace.xml

+142-128
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

AdaBoost/adaboost.py

+40-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# @Last modified time: 15-11-16
77

88
import math
9+
import logging
910

1011
class Sign(object):
1112
def __init__(self,features,labels,w):
@@ -15,15 +16,19 @@ def __init__(self,features,labels,w):
1516

1617
self.w = w
1718

19+
mmax = max(self.X)
20+
self.indexes = self.X[:]
21+
self.indexes.append(mmax+1)
22+
1823
def _train_less_than_(self):
1924
index = -1
2025
error_score = 1000000
2126

22-
for i in xrange(self.N+1):
27+
for i in self.indexes:
2328
score = 0
2429
for j in xrange(self.N):
2530
val = -1
26-
if j<i:
31+
if self.X[j]<i:
2732
val = 1
2833

2934
if val*self.Y[j]<0:
@@ -41,11 +46,11 @@ def _train_more_than_(self):
4146
index = -1
4247
error_score = 1000000
4348

44-
for i in xrange(self.N+1):
49+
for i in self.indexes:
4550
score = 0
4651
for j in xrange(self.N):
4752
val = 1
48-
if j<i:
53+
if self.X[j]<i:
4954
val = -1
5055

5156
if val*self.Y[j]<0:
@@ -95,7 +100,7 @@ def _init_parameters_(self,features,labels):
95100

96101
self.n = len(features[0])
97102
self.N = len(features)
98-
self.M = 100 # 分类器数目
103+
self.M = 10000 # 分类器数目
99104

100105
self.w = [1.0/self.N]*self.N
101106
self.alpha = []
@@ -117,6 +122,7 @@ def train(self,features,labels):
117122
self._init_parameters_(features,labels)
118123

119124
for times in xrange(self.M):
125+
logging.debug('iterater %d' % times)
120126

121127
best_classifier = (100000,None,None) #(误差率,分类器,针对的特征)
122128
for i in xrange(self.n):
@@ -125,17 +131,44 @@ def train(self,features,labels):
125131
error_score = classifier.train()
126132

127133
if error_score < best_classifier[0]:
128-
best_classifier = (error_score,classifier,i)
134+
best_classifier = (error_score,i,classifier)
129135

130136
em = best_classifier[0]
131-
self.alpha.append(1/2*math.log((1-em)/em))
137+
if em==0:
138+
self.alpha.append(100)
139+
else:
140+
self.alpha.append(0.5*math.log((1-em)/em))
141+
132142
self.classifier.append(best_classifier[1:])
133143

134144
Z = self._Z_(best_classifier[1],best_classifier[2])
135145

136146
for i in xrange(self.N):
137147
self.w[i] = self._w_(best_classifier[1],best_classifier[2],i)/Z
138148

149+
def _predict_(self,feature):
150+
151+
result = 0.0
152+
for i in xrange(self.M):
153+
index = self.classifier[i][0]
154+
classifier = self.classifier[i][1]
155+
156+
result += self.alpha[i]*classifier.predict(feature[index])
157+
158+
if result>0:
159+
return 1
160+
return -1
161+
162+
163+
164+
def predict(self,features):
165+
results = []
166+
167+
for feature in features:
168+
results.append(self._predict_(feature))
169+
170+
return results
171+
139172
if __name__ == '__main__':
140173
features = [[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]]
141174
labels = [1,1,1,-1,-1,-1,1,1,1,-1]

AdaBoost/adaboost.pyc

5.02 KB
Binary file not shown.

AdaBoost/adaboost_fakedata.py

+23
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,26 @@
44
55
# @Last modified by: wendesi
66
# @Last modified time: 15-11-16
7+
8+
import logging
9+
10+
from generate_dataset import *
11+
from adaboost import AdaBoost
12+
13+
from sklearn.metrics import accuracy_score
14+
15+
if __name__ == '__main__':
16+
logger = logging.getLogger()
17+
logger.setLevel(logging.DEBUG)
18+
19+
train_features, train_labels, test_features, test_labels = generate_dataset(200)
20+
21+
ada = AdaBoost()
22+
ada.train(train_features,train_labels)
23+
24+
print 'end train'
25+
test_predict = ada.predict(test_features)
26+
27+
28+
score = accuracy_score(test_labels,test_predict)
29+
print "ada boost the accruacy socre is ", score

AdaBoost/generate_dataset.py

+101
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,104 @@ def generate_dataset(size, noisy = False, visualization = True):
134134
# X,y,w = mk_data(size,False)
135135
#
136136
# data_visualization(X,y)
137+
# encoding=utf8
138+
139+
140+
141+
142+
143+
144+
145+
146+
147+
148+
149+
150+
151+
152+
153+
154+
155+
156+
157+
158+
159+
160+
161+
162+
163+
164+
165+
166+
167+
168+
169+
170+
171+
172+
173+
174+
175+
176+
177+
178+
# #encoding=utf-8
179+
# # @Author: wendesi
180+
# # @Date: 15-11-16
181+
# # @Email: [email protected]
182+
# # @Last modified by: wendesi
183+
# # @Last modified time: 15-11-16
184+
#
185+
#
186+
#
187+
#
188+
# import random
189+
#
190+
# def generate(size,point,radius,label):
191+
# results = []
192+
#
193+
# for i in xrange(size):
194+
# result = [label]
195+
#
196+
# for j in xrange(len(point)):
197+
# x = point[j]+random.randint(-radius,radius)
198+
# result.append(x)
199+
#
200+
# results.append(result)
201+
#
202+
# return results
203+
#
204+
#
205+
# def generate_dataset(size):
206+
# class1 = generate(size/2,(0,0),10,-1)
207+
# class2 = generate(size-size/2,(21,21),10,1)
208+
#
209+
# class_ = class1
210+
# class_.extend(class2)
211+
#
212+
# random.shuffle(class_)
213+
#
214+
# split_point = int(float(size)*0.333)
215+
# testset = class_[:split_point]
216+
# trainset = class_[split_point:]
217+
#
218+
# trainset_features = map(lambda x:x[1:],trainset)
219+
# trainset_labels = map(lambda x:x[0],trainset)
220+
#
221+
# testset_features = map(lambda x:x[1:],testset)
222+
# testset_labels = map(lambda x:x[0],testset)
223+
#
224+
# return trainset_features,trainset_labels,testset_features,testset_labels
225+
#
226+
#
227+
# if __name__ == '__main__':
228+
#
229+
# size = 1000
230+
# generate_dataset(size)
231+
#
232+
# # generate_dataset
233+
# # print sign
234+
# # sign = np.vectorize(sign)
235+
# # X,y,w = mk_data(size,False)
236+
# #
237+
# # data_visualization(X,y)

AdaBoost/generate_dataset.pyc

3.69 KB
Binary file not shown.

svm/generate_dataset.py

+1
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,4 @@ def generate_dataset(size, noisy = False, visualization = True):
134134
# X,y,w = mk_data(size,False)
135135
#
136136
# data_visualization(X,y)
137+
# encoding=utf8

0 commit comments

Comments
 (0)