forked from Kyle1993/Amazon-Kaggle
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscore_thresholds.py
82 lines (79 loc) · 3.06 KB
/
score_thresholds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from sklearn.metrics import fbeta_score
import numpy as np
def getScore(y,p,x):
p2 = np.zeros_like(p)
for i in range(17):
p2[:, i] = (p[:, i] > x[i]).astype(np.int)
score = fbeta_score(y, p2, beta=2, average='samples')
return score
def optimise_f2_thresholds(y, p, verbose=True, resolution=100):
x = [0.2]*17
for i in range(17):
best_i2 = 0
best_score = 0
for i2 in range(resolution):
i2 /= resolution
x[i] = i2
score = getScore(y,p,x)
if score > best_score:
best_i2 = i2
best_score = score
x[i] = best_i2
if verbose:
print(i, best_i2, best_score)
return x
# def optimise_f2_thresholds(y, p, verbose=True, resolution=[100] * 17, base_x=None):
# if base_x is None:
# x = [0.2] * 17
# else:
# x = base_x
# p2 = (p > x).astype(np.int)
# _tp = ((p2 == 1) & (p2 == y)).astype('int')
# _fp = ((p2 == 1) & (p2 != y)).astype('int')
# _fn = ((p2 == 0) & (p2 != y)).astype('int')
# _tp_sum = np.sum(_tp, 1)
# _fp_sum = np.sum(_fp, 1)
# _fn_sum = np.sum(_fn, 1)
#
# for i in range(17):
# best_i2 = 0
# best_score = 0
# score_range = []
# for i2 in range(resolution[i]):
# i2 /= 1.0 * resolution[i]
# x[i] = i2
# tmp = (p[:, i] > x[i]).astype(np.int)
# if np.all(tmp == p2[:, i]):
# continue
# else:
# p2[:, i] = tmp
# _tp_sum = _tp_sum - _tp[:, i] + ((tmp == 1) & (tmp == y[:, i])).astype('int')
# _fp_sum = _fp_sum - _fp[:, i] + ((tmp == 1) & (tmp != y[:, i])).astype('int')
# _fn_sum = _fn_sum - _fn[:, i] + ((tmp == 0) & (tmp != y[:, i])).astype('int')
# _tp[:, i] = ((tmp == 1) & (tmp == y[:, i])).astype('int')
# _fp[:, i] = ((tmp == 1) & (tmp != y[:, i])).astype('int')
# _fn[:, i] = ((tmp == 0) & (tmp != y[:, i])).astype('int')
# _p = 1.0 * _tp_sum / (_tp_sum + _fp_sum)
# _r = 1.0 * _tp_sum / (_tp_sum + _fn_sum)
# score = np.mean(5.0 * _p * _r / (np.maximum(4 * _p + _r, 1e-15)))
# if score > best_score:
# best_i2 = i2
# best_score = score
# x[i] = best_i2
# tmp = (p[:, i] > x[i]).astype(np.int)
# if np.all(tmp == p2[:, i]):
# continue
# else:
# p2[:, i] = tmp
# _tp_sum = _tp_sum - _tp[:, i] + ((tmp == 1) & (tmp == y[:, i])).astype('int')
# _fp_sum = _fp_sum - _fp[:, i] + ((tmp == 1) & (tmp != y[:, i])).astype('int')
# _fn_sum = _fn_sum - _fn[:, i] + ((tmp == 0) & (tmp != y[:, i])).astype('int')
# _tp[:, i] = ((tmp == 1) & (tmp == y[:, i])).astype('int')
# _fp[:, i] = ((tmp == 1) & (tmp != y[:, i])).astype('int')
# _fn[:, i] = ((tmp == 0) & (tmp != y[:, i])).astype('int')
# p2[:, i] = tmp
#
# if verbose:
# print(i, best_i2, best_score)
# print('Train', best_score)
# return x