-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtransformation_in_x.py
95 lines (74 loc) · 3.01 KB
/
transformation_in_x.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import tensorflow as tf
import numpy as np
import math
@tf.function
def apply_transformation_in_x(x, shift, shift2, K, K2, T):
if T == 'tabular_scaling':
return _tabular_scaling(x,shift)
elif T == 'image_rotation':
return _image_rotation(x, shift, K)
elif T == 'missing_values':
return _missing_values(x,shift, K)
elif T == 'proportional_noise':
return _proportional_noise(x, shift, shift2, K2)
else:
return _translate_only(x,shift)
def _translate_only(x,shift): pass
@tf.function
def _tabular_scaling(x,shift):
if shift == 0: diagonal = [1.,1.,1.,1.,1.,1.,1.,1.]
elif shift == 1: diagonal = [1.,1.6,1.4,1.,1.,1.,1.,1.]
elif shift == 2: diagonal = [1.,1.,1.,1.,1.5,1.,1.,1.]
elif shift == 3: diagonal = [1.,1.,1.,1.,1.,1.,1.3,1.3]
elif shift == 7: diagonal = [1.,1.,1.,1.,1.,1.,1.70,1.70] # test for starting on a non anchor K
else: diagonal = [1.,1.,1.,1.,1.,1.,1.,1.]
return x * diagonal
#The data is in the iterval 0,1 so -1 is a valid "NULL"
#To train on a Benchmark just remove the rows with nmissing values, train, then predict completion for the evaluation
@tf.function
def _missing_values(x, shift, K):
if int(shift) == 0:
return x
else:
x_missing = tf.tensor_scatter_nd_update(x, [[shift-1]], [-1])
return x_missing
#The data is in the iterval 0,1 so after nbosie it must stay in the same place
@tf.function
def _proportional_noise(x, shift, shift2, K2):
if int(shift) == 0:
return x
else:
if shift2 < 5:
a = 0.25 * (5 - float(shift2))
else:
a = 0.25 * (float(shift2) - 4)
#half of K2 for negative and half for positive
if int(shift2) <= int(K2/2):
noise = -1 * (x[shift-1] * a)
else: noise = (x[shift-1] * a)
x_missing = tf.tensor_scatter_nd_add(x, [[shift-1]], [noise])
return x_missing
@tf.function
def include_errors_at_random(x, K, factor):
aux = np.ones((x.shape[0], K)) + 1
aux[:,0] = 2.0 * factor #increase chance of no error to be factor fold
logits = tf.math.log(aux)
missing = tf.random.categorical(logits, x.shape[1], dtype = tf.int32)
#enable to scal 0 valued columns
x_transformed = tf.where(x == 0, x + 1e-10, x)
#ideal intervals to avoid x = x*1,
#can run into problems as soon as K >= 20
increment = 1 / (K/2)
aux = 1
for i in range(K):
if i == K - 1:#last must be the MV position
x_transformed = tf.where(missing == i, 3.0, x_transformed)
else:
step = (i + aux) * increment
if 0.9 <= step <= 1.10:#too close to the original value
aux = aux + 1
step = (i + aux) * increment
x_transformed = tf.where(missing == i + 1, x_transformed * step, x_transformed)
#transforme back 0 valued columns
x_transformed = tf.where(x == 0, x_transformed - 1e-10, x_transformed) #abs otherwise 0.5 - 1 = -0.5
return x_transformed, missing