Skip to content

Experiments

The experiments module contains the methods used to perform all the experiments outlined in the paper.

ablation_EIF_plus(I, dataset, eta_list, nruns=10)

Compute the average precision scores for different values of the eta parameter in the EIF+ model.

Parameters:

Name Type Description Default
I Type[ExtendedIsolationForest]

The AD model.

required
dataset Type[Dataset]

Input dataset.

required
eta_list list[float]

The list of eta values.

required
nruns int

The number of runs. Defaults to 10.

10

Returns:

Type Description
list[array]

The average precision scores.

Source code in utils_reboot/experiments.py
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
def ablation_EIF_plus(I:Type[ExtendedIsolationForest], 
                      dataset:Type[Dataset], 
                      eta_list:list[float], 
                      nruns:int=10) -> list[np.array]:

    """
    Compute the average precision scores for different values of the eta parameter in the EIF+ model.

    Args:
        I: The AD model.
        dataset: Input dataset.
        eta_list: The list of eta values.
        nruns: The number of runs. Defaults to 10.

    Returns:
        The average precision scores.
    """

    precisions = []
    for eta in tqdm(eta_list):
        precision = []
        for run in range(nruns):
            I.eta = eta
            I.fit(dataset.X_train)
            score = I.predict(dataset.X_test)
            precision.append(average_precision_score(dataset.y_test, score))
        precisions.append(precision)
    return precisions

compute_global_importances(I, dataset, p=0.1, interpretation='EXIFFI+', fit_model=True)

Compute the global feature importances for an interpration model on a specific dataset.

Parameters:

Name Type Description Default
I Type[ExtendedIsolationForest]

The AD model.

required
dataset Type[Dataset]

Input dataset.

required
p

The percentage of outliers in the dataset (i.e. contamination factor). Defaults to 0.1.

0.1
interpretation

Name of the interpretation method to be used. Defaults to "EXIFFI+".

'EXIFFI+'
fit_model

Whether to fit the model on the dataset. Defaults to True.

True

Returns:

Type Description
array

The global feature importance vector.

Source code in utils_reboot/experiments.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def compute_global_importances(I: Type[ExtendedIsolationForest],
                        dataset: Type[Dataset],
                        p = 0.1,
                        interpretation="EXIFFI+",
                        fit_model = True) -> np.array: 

    """
    Compute the global feature importances for an interpration model on a specific dataset.

    Args:
        I: The AD model.
        dataset: Input dataset.
        p: The percentage of outliers in the dataset (i.e. contamination factor). Defaults to 0.1.
        interpretation: Name of the interpretation method to be used. Defaults to "EXIFFI+".
        fit_model: Whether to fit the model on the dataset. Defaults to True.

    Returns:
        The global feature importance vector.

    """

    if fit_model:
        I.fit(dataset.X_train)        
    if interpretation=="DIFFI":
        fi,_=diffi_ib(I,dataset.X_test)
    elif interpretation=="EXIFFI" or interpretation=='EXIFFI+':
        fi=I.global_importances(dataset.X_test,p)
    elif interpretation=="RandomForest":
        rf = RandomForestRegressor()
        rf.fit(dataset.X_test, I.predict(dataset.X_test))
        fi = rf.feature_importances_
    return fi

compute_plt_data(imp_path)

Compute statistics on the global feature importances obtained from experiment_global_importances. These will then be used in the score_plot method.

Parameters:

Name Type Description Default
imp_path str

The path to the importances file.

required

Returns:

Type Description
dict

The dictionary containing the mean importances, the feature order, and the standard deviation of the importances.

Source code in utils_reboot/experiments.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def compute_plt_data(imp_path:str) -> dict:

    """
    Compute statistics on the global feature importances obtained from experiment_global_importances. These will then be used in the score_plot method. 

    Args:
        imp_path: The path to the importances file.

    Returns:
        The dictionary containing the mean importances, the feature order, and the standard deviation of the importances.
    """

    try:
        fi = np.load(imp_path)['element']
    except:
        print("Error: importances file should be npz")
    # Handle the case in which there are some np.nan in the fi array
    if np.isnan(fi).any():
        #Substitute the np.nan values with 0  
        #fi=np.nan_to_num(fi,nan=0)
        mean_imp = np.nanmean(fi,axis=0)
        std_imp = np.nanstd(fi,axis=0)
    else:
        mean_imp = np.mean(fi,axis=0)
        std_imp = np.std(fi,axis=0)

    feat_ordered = mean_imp.argsort()
    mean_ordered = mean_imp[feat_ordered]
    std_ordered = std_imp[feat_ordered]

    plt_data={'Importances': mean_ordered,
                'feat_order': feat_ordered,
                'std': std_ordered}
    return plt_data

contamination_in_training_precision_evaluation(I, dataset, n_runs=10, train_size=0.8, contamination_values=np.linspace(0.0, 0.1, 10), compute_GFI=False, interpretation='EXIFFI+', pre_process=True)

Evaluate the average precision of the model on the dataset for different contamination values in the training set. The precision values will then be used in the plot_precision_over_contamination method

Parameters:

Name Type Description Default
I Type[ExtendedIsolationForest]

The AD model.

required
dataset Type[Dataset]

Input dataset.

required
n_runs int

The number of runs. Defaults to 10.

10
train_size

The size of the training set. Defaults to 0.8.

0.8
contamination_values NDArray

The contamination values. Defaults to np.linspace(0.0,0.1,10).

linspace(0.0, 0.1, 10)
compute_GFI bool

Whether to compute the global feature importances. Defaults to False.

False
interpretation str

Name of the interpretation method to be used. Defaults to "EXIFFI+".

'EXIFFI+'
pre_process bool

Whether to pre process the dataset. Defaults to True.

True

Returns:

Type Description
Union[tuple[ndarray, ndarray], ndarray]

The average precision scores and the global feature importances if compute_GFI is True,

Union[tuple[ndarray, ndarray], ndarray]

otherwise just the average precision scores are returned.

Source code in utils_reboot/experiments.py
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
def contamination_in_training_precision_evaluation(I: Type[ExtendedIsolationForest],
                                                   dataset: Type[Dataset],
                                                   n_runs: int = 10,
                                                   train_size = 0.8,
                                                   contamination_values: npt.NDArray = np.linspace(0.0,0.1,10),
                                                   compute_GFI:bool=False,
                                                   interpretation:str="EXIFFI+",
                                                   pre_process:bool=True,
                                                   ) -> Union[tuple[np.ndarray, np.ndarray], np.ndarray]:

    """
    Evaluate the average precision of the model on the dataset for different contamination values in the training set. 
    The precision values will then be used in the `plot_precision_over_contamination` method

    Args:
        I: The AD model.
        dataset: Input dataset.
        n_runs: The number of runs. Defaults to 10.
        train_size: The size of the training set. Defaults to 0.8.
        contamination_values: The contamination values. Defaults to `np.linspace(0.0,0.1,10)`.
        compute_GFI: Whether to compute the global feature importances. Defaults to False.
        interpretation: Name of the interpretation method to be used. Defaults to "EXIFFI+".
        pre_process: Whether to pre process the dataset. Defaults to True.

    Returns:
        The average precision scores and the global feature importances if `compute_GFI` is True, 
        otherwise just the average precision scores are returned. 
    """

    precisions = np.zeros(shape=(len(contamination_values),n_runs))
    if compute_GFI:
        importances = np.zeros(shape=(len(contamination_values),n_runs,len(contamination_values),dataset.X.shape[1]))
    for i,contamination in tqdm(enumerate(contamination_values)):
        for j in range(n_runs):
            dataset.split_dataset(train_size,contamination)
            dataset.initialize_test()

            if pre_process:
                dataset.pre_process()

            start_time = time.time()
            I.fit(dataset.X_train)
            fit_time = time.time() - start_time

            if j>3:
                try:
                    dict_time["fit"][I.name].setdefault(dataset.name, []).append(fit_time)
                except:
                    print('Model not recognized: creating a new key in the dict_time for the new model')
                    dict_time["fit"].setdefault(I.name, {}).setdefault(dataset.name, []).append(fit_time)

            if compute_GFI:
                for k,c in enumerate(contamination_values):
                    start_time = time.time()
                    importances[i,j,k,:] = compute_global_importances(I,
                                                                    dataset,
                                                                    p=c,
                                                                    interpretation=interpretation,
                                                                    fit_model=False)
                    gfi_time = time.time() - start_time
                    if k>3: 
                        dict_time["importances"][interpretation].setdefault(dataset.name, []).append(gfi_time)

            start_time = time.time()
            score = I.predict(dataset.X_test)
            predict_time = time.time() - start_time
            if j>3:
                try:
                    dict_time["predict"][I.name].setdefault(dataset.name, []).append(predict_time)
                except:
                    print('Model not recognized: creating a new key in the dict_time for the new model')
                    dict_time["predict"].setdefault(I.name, {}).setdefault(dataset.name, []).append(predict_time)

            avg_prec = sklearn.metrics.average_precision_score(dataset.y_test,score)
            precisions[i,j] = avg_prec

    with open(filename, "wb") as file:
        pickle.dump(dict_time, file)
    if compute_GFI:
        return precisions,importances
    return precisions

experiment_global_importances(I, dataset, n_runs=10, p=0.1, model='EIF+', interpretation='EXIFFI+')

Compute the global feature importances for an interpration model on a specific dataset for a number of runs.

Parameters:

Name Type Description Default
I Type[ExtendedIsolationForest]

The AD model.

required
dataset Type[Dataset]

Input dataset.

required
n_runs int

The number of runs. Defaults to 10.

10
p float

The percentage of outliers in the dataset (i.e. contamination factor). Defaults to 0.1.

0.1
model str

The name of the model. Defaults to 'EIF+'.

'EIF+'
interpretation str

Name of the interpretation method to be used. Defaults to "EXIFFI+".

'EXIFFI+'

Returns:

Type Description
tuple[array, dict, str, str]

The global feature importances vectors for the different runs and the average importances times.

Source code in utils_reboot/experiments.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def experiment_global_importances(I:Type[ExtendedIsolationForest],
                               dataset:Type[Dataset],
                               n_runs:int=10, 
                               p:float=0.1,
                               model:str="EIF+",
                               interpretation:str="EXIFFI+"
                               ) -> tuple[np.array,dict,str,str]:

    """
    Compute the global feature importances for an interpration model on a specific dataset for a number of runs.

    Args:
        I: The AD model.
        dataset: Input dataset.
        n_runs: The number of runs. Defaults to 10.
        p: The percentage of outliers in the dataset (i.e. contamination factor). Defaults to 0.1.
        model: The name of the model. Defaults to 'EIF+'.
        interpretation: Name of the interpretation method to be used. Defaults to "EXIFFI+".

    Returns:
        The global feature importances vectors for the different runs and the average importances times.
    """
    fi=np.zeros(shape=(n_runs,dataset.X.shape[1]))
    imp_times=[]
    for i in tqdm(range(n_runs)):
        start_time = time.time()
        fi[i,:]=compute_global_importances(I,
                        dataset,
                        p = p,
                        interpretation=interpretation)
        gfi_time = time.time() - start_time
        if i>3:
            imp_times.append(gfi_time)
            if (model=="IF") and (interpretation=="EXIFFI"):
                dict_time["importances"]["IF_EXIFFI"].setdefault(dataset.name, []).append(gfi_time)
            else:
                dict_time["importances"][interpretation].setdefault(dataset.name, []).append(gfi_time)
            #print(f'Added time {str(gfi_time)} to time dict')

    with open(filename, "wb") as file:
        pickle.dump(dict_time, file)
    return fi,np.mean(imp_times)

feature_selection(I, dataset, importances_indexes, n_runs=10, inverse=True, random=False, scenario=2)

Perform feature selection on the dataset by dropping features in order of importance.

Parameters:

Name Type Description Default
I Type[ExtendedIsolationForest]

The AD model.

required
dataset Type[Dataset]

Input dataset.

required
importances_indexes NDArray

The indexes of the features in the dataset.

required
n_runs int

The number of runs. Defaults to 10.

10
inverse bool

Whether to drop the features in decreasing order of importance. Defaults to True.

True
random bool

Whether to drop the features in random order. Defaults to False.

False
scenario int

The scenario of the experiment. Defaults to 2.

2

Returns:

Type Description
array

The average precision scores for the different runs.

Source code in utils_reboot/experiments.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
def feature_selection(I: Type[ExtendedIsolationForest],
                      dataset: Type[Dataset],
                      importances_indexes: npt.NDArray,
                      n_runs: int = 10, 
                      inverse: bool = True,
                      random: bool = False,
                      scenario:int=2
                      ) -> np.array:

        """
        Perform feature selection on the dataset by dropping features in order of importance.

        Args:
            I: The AD model.
            dataset: Input dataset.
            importances_indexes: The indexes of the features in the dataset.
            n_runs: The number of runs. Defaults to 10.
            inverse: Whether to drop the features in decreasing order of importance. Defaults to True.
            random: Whether to drop the features in random order. Defaults to False.
            scenario: The scenario of the experiment. Defaults to 2.

        Returns:
            The average precision scores for the different runs.
        """

        dataset_shrinking = copy.deepcopy(dataset)
        d = dataset.X.shape[1]
        precisions = np.zeros(shape=(len(importances_indexes),n_runs))
        for number_of_features_dropped in tqdm(range(len(importances_indexes))):
            runs = np.zeros(n_runs)
            for run in range(n_runs):
                if random:
                    importances_indexes = np.random.choice(importances_indexes, len(importances_indexes), replace=False)
                dataset_shrinking.X = dataset.X_test[:,importances_indexes[:d-number_of_features_dropped]] if not inverse else dataset.X_test[:,importances_indexes[number_of_features_dropped:]]
                dataset_shrinking.y = dataset.y
                dataset_shrinking.drop_duplicates()

                if scenario==2:
                    dataset_shrinking.split_dataset(1-dataset_shrinking.perc_outliers,0)
                    dataset_shrinking.initialize_test()
                else:
                    dataset_shrinking.initialize_train()
                    dataset_shrinking.initialize_test()

                try:
                    if dataset.X.shape[1] == dataset_shrinking.X.shape[1]:

                        start_time = time.time()
                        I.fit(dataset_shrinking.X_train)
                        fit_time = time.time() - start_time

                        if run >3:
                            dict_time["fit"][I.name].setdefault(dataset.name, []).append(fit_time)
                        start_time = time.time()
                        score = I.predict(dataset_shrinking.X_test)
                        predict_time = time.time() - start_time

                        if run >3:                        
                            dict_time["predict"][I.name].setdefault(dataset.name, []).append(predict_time)
                    else:
                        I.fit(dataset_shrinking.X_train)
                        score = I.predict(dataset_shrinking.X_test)
                    avg_prec = sklearn.metrics.average_precision_score(dataset_shrinking.y,score)
                    runs[run] = avg_prec
                except:
                    runs[run] = np.nan

            precisions[number_of_features_dropped] = runs

        with open(filename, "wb") as file:
            pickle.dump(dict_time, file)
        return precisions

fit_predict_experiment(I, dataset, n_runs=40, model='EIF+')

Fit and predict the model on the dataset for a number of runs and keep track of the fit and predict times.

Parameters:

Name Type Description Default
I Type[ExtendedIsolationForest]

The AD model.

required
dataset Type[Dataset]

Input dataset.

required
n_runs int

The number of runs. Defaults to 40.

40
model

The name of the model. Defaults to 'EIF+'.

'EIF+'

Returns:

Type Description
tuple[float, float]

The average fit and predict time.

Source code in utils_reboot/experiments.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def fit_predict_experiment(I: Type[ExtendedIsolationForest],
                            dataset: Type[Dataset],
                            n_runs:int = 40,
                            model='EIF+') -> tuple[float,float]:

    """
    Fit and predict the model on the dataset for a number of runs and keep track of the fit and predict times.

    Args:
        I: The AD model.
        dataset: Input dataset.
        n_runs: The number of runs. Defaults to 40.
        model: The name of the model. Defaults to 'EIF+'.

    Returns:
        The average fit and predict time.
    """

    fit_times = []
    predict_times = []

    for i in trange(n_runs):
        start_time = time.time()
        I.fit(dataset.X_train)
        fit_time = time.time() - start_time
        if i>3:  
            fit_times.append(fit_time)
            dict_time["fit"][I.name].setdefault(dataset.name, []).append(fit_time) 

        start_time = time.time()
        if model in ['EIF','EIF+']:
            _=I._predict(dataset.X_test,p=dataset.perc_outliers)
            predict_time = time.time() - start_time
        elif model in ['sklearn_IF','DIF','AnomalyAutoencoder']:
            _=I.predict(dataset.X_test)
            predict_time = time.time() - start_time

        if i>3:
            predict_times.append(predict_time)
            dict_time["predict"][I.name].setdefault(dataset.name, []).append(predict_time)

    with open(filename, "wb") as file:
        pickle.dump(dict_time, file)

    return np.mean(fit_times), np.mean(predict_times)

performance(y_pred, y_true, score, I, model_name, dataset, contamination=0.1, train_size=0.8, scenario=2, n_runs=10, filename='', path=os.getcwd(), save=True)

Compute the performance metrics of the model on the dataset.

Parameters:

Name Type Description Default
y_pred array

The predicted labels.

required
y_true array

The true labels.

required
score array

The Anomaly Scores.

required
I Type[ExtendedIsolationForest]

The AD model.

required
model_name str

The name of the model.

required
dataset Type[Dataset]

Input dataset.

required
contamination float

The contamination factor. Defaults to 0.1.

0.1
train_size float

The size of the training set. Defaults to 0.8.

0.8
scenario int

The scenario of the experiment. Defaults to 2.

2
n_runs int

The number of runs. Defaults to 10.

10
filename str

The filename. Defaults to "".

''
path str

The path to the experiments folder. Defaults to os.getcwd().

getcwd()
save bool

Whether to save the results. Defaults to True.

True

Returns:

Type Description
tuple[DataFrame, str]

The performance metrics and the path to the results.

Source code in utils_reboot/experiments.py
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
def performance(y_pred:np.array,
                y_true:np.array,
                score:np.array,
                I:Type[ExtendedIsolationForest],
                model_name:str,
                dataset:Type[Dataset],
                contamination:float=0.1,
                train_size:float=0.8,
                scenario:int=2,
                n_runs:int=10,
                filename:str="",
                path:str=os.getcwd(),
                save:bool=True
                ) -> tuple[pd.DataFrame,str]: 

    """
    Compute the performance metrics of the model on the dataset.

    Args:
        y_pred: The predicted labels.
        y_true: The true labels.
        score: The Anomaly Scores.
        I: The AD model.
        model_name: The name of the model.
        dataset: Input dataset.
        contamination: The contamination factor. Defaults to 0.1.
        train_size: The size of the training set. Defaults to 0.8.
        scenario: The scenario of the experiment. Defaults to 2.
        n_runs: The number of runs. Defaults to 10.
        filename: The filename. Defaults to "".
        path: The path to the experiments folder. Defaults to os.getcwd().
        save: Whether to save the results. Defaults to True.

    Returns:
        The performance metrics and the path to the results.
    """

    y_pred=y_pred.astype(int)
    y_true=y_true.astype(int)

    if dataset.X.shape[0]>7500:
        dataset.downsample(max_samples=7500)

    precisions=[]
    for i in trange(n_runs):
        I.fit(dataset.X_train)
        if model_name in ['DIF','AnomalyAutoencoder']:
            score = I.decision_function(dataset.X_test)
        else:
            score = I.predict(dataset.X_test)
        precisions.append(average_precision_score(y_true, score))

    df=pd.DataFrame({
        "Model": model_name,
        "Dataset": dataset.name,
        "Contamination": contamination,
        "Train Size": train_size,
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "f1 score": f1_score(y_true, y_pred),
        "Accuracy": accuracy_score(y_true, y_pred),
        "Balanced Accuracy": balanced_accuracy_score(y_true, y_pred),
        "Average Precision": np.mean(precisions),
        "ROC AUC Score": roc_auc_score(y_true, y_pred)
    }, index=[pd.Timestamp.now()])

    path=path + f"/experiments/results/{dataset.name}/experiments/metrics/{model_name}/" + f"scenario_{str(scenario)}/"

    if not os.path.exists(path):
        os.makedirs(path)

    filename=f"perf_{dataset.name}_{model_name}_{scenario}"

    if save:
        save_element(df, path, filename)

    return df,path