-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLIME_Iris_ex.py
91 lines (79 loc) · 3.41 KB
/
LIME_Iris_ex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import sys
sys.path.insert(0, '../LIMEaid/LIMEaid/controller')
sys.path.insert(0, '../LIMEaid/LIMEaid/model')
sys.path.insert(0, '../LIMEaid/LIMEaid/view')
import fit_sklearn_models as fsm
import LIMEaid as la
import LIMEdisplay as ld
import load_college_dataset as gcd
import numpy as np
from sklearn import preprocessing
from sklearn import datasets
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
#####################################################################
# This example uses the functions in the LIMEaid.py package to
# explain classification of instances from a machine learning model.
# Example uses either a Naive Bayes or a Decision Tree classifier.
#####################################################################
# Read Iris dataset
data_set = datasets.load_iris()
# Number of perturbed samples to be generated.
n = 10000
# Number of bins for the histograms of continous attributes.
num_bins = 25
# Normalizing Iris attributes.
data_norm = preprocessing.scale(data_set.data)
# Choose one of the machine learning models below:
# Fitting a decision tree model to the iris dataset.
# clf = tree.DecisionTreeClassifier()
# clf = clf.fit(data_norm, data_set.target)
# Fitting a Naive Bayes model.
clf = GaussianNB()
clf = clf.fit(data_norm, data_set.target)
# Now we generate the random samples.
# Note that because all four attributes are floats (continuous),
# we do not call the discrete random generator.
perturbed_samples = np.zeros(n)
# The for loop calls the random sample generator four times,
# once for each attribute in the Iris dataset.
for j in range(0, data_set.data.shape[1]):
array = data_set.data[:, j]
output = la.lime_sample(n, True, array, num_bins)
perturbed_samples = np.vstack((perturbed_samples, output))
perturbed_samples = np.transpose(perturbed_samples[1:, ])
# Label the samples using the model we fitted before.
class_perturb_samples = clf.predict(perturbed_samples)
# From the original data we select an instance at random.
# This is the instance we will interpret using LIME.
inst_num = np.round(np.random.uniform(0, data_set.data.shape[0], 1))
inst_num = inst_num[0].astype(int)
# x is the instance we are selecting and x_class is its
# classification. If you don't want to use a randomly selected
# instance, use one of your choice.
x = data_norm[inst_num, :]
x_class = data_set.target[inst_num]
# Call LIME.
lime_beta, lime_int, lime_weigh = la.lime_fit(x,
x_class,
perturbed_samples,
class_perturb_samples)
# Print output of LIME results.
print("Instance to be interpreted:")
for j in range(0, len(lime_beta)):
print("Feature: ", data_set.feature_names[j], "\tvalue: ",
data_set.data[inst_num, j], "\tnormalized value: ",
data_norm[inst_num, j])
print("Classification: ",
data_set.target_names[data_set.target[inst_num]],
data_set.target[inst_num])
print("\nSignificant coefficients from LIME adjusted"
" linear model:")
for j in range(0, len(lime_beta)):
if(lime_beta[j] != 0):
print("Feature: ", data_set.feature_names[j],
"\tCoefficient: ", lime_beta[j])
print("Intercept: ", lime_int)
full_data = np.column_stack((perturbed_samples, class_perturb_samples))
ld.lime_display(full_data, lime_beta, lime_int, x, x_class,
data_set.feature_names, data_set.target_names)