Skip to content

Utils

This section contains the documentation for the utils module. This module collects a series of utility functions that are used mainly for the experiments.

AutoEncoder

Bases: AutoEncoder

Wrapper of pyod.models.auto_encoder.AutoEncoder

Source code in utils_reboot/utils.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
class AutoEncoder(oldAutoEncoder):

    """
    Wrapper of `pyod.models.auto_encoder.AutoEncoder`
    """

    def __init__(self, **kwargs):

        """
        Constructor of the class `AutoEncoder` which uses the constructor of the parent class `AutoEncoder` from `pyod.models.auto_encoder` module.

        Attributes:
            name (str): Add the name attribute to the class.
        """

        super().__init__(**kwargs)
        self.name = "AnomalyAutoencoder"

    def predict(self, X:np.array) -> np.array:

        """
        Overwrite the `predict` method of the parent class `AutoEncoder` from `pyod.models.auto_encoder` module to obtain the
        Anomaly Scores instead of the class labels (i.e. inliers and outliers)

        Args:
            X: Input dataset

        Returns:
            Anomaly Scores
        """
        score=self.decision_function(X)
        return score

    def _predict(self,
                 X:np.array,
                 p:float)-> np.array:

        """
        Method to predict the class labels based on the Anomaly Scores and the contamination factor `p`

        Args:
            X: Input dataset
            p: Contamination factor

        Returns:
            Class labels (i.e. 0 for inliers and 1 for outliers)
        """

        An_score = self.predict(X)
        y_hat = An_score > sorted(An_score,reverse=True)[int(p*len(An_score))]
        return y_hat

__init__(**kwargs)

Constructor of the class AutoEncoder which uses the constructor of the parent class AutoEncoder from pyod.models.auto_encoder module.

Attributes:

Name Type Description
name str

Add the name attribute to the class.

Source code in utils_reboot/utils.py
115
116
117
118
119
120
121
122
123
124
125
def __init__(self, **kwargs):

    """
    Constructor of the class `AutoEncoder` which uses the constructor of the parent class `AutoEncoder` from `pyod.models.auto_encoder` module.

    Attributes:
        name (str): Add the name attribute to the class.
    """

    super().__init__(**kwargs)
    self.name = "AnomalyAutoencoder"

predict(X)

Overwrite the predict method of the parent class AutoEncoder from pyod.models.auto_encoder module to obtain the Anomaly Scores instead of the class labels (i.e. inliers and outliers)

Parameters:

Name Type Description Default
X array

Input dataset

required

Returns:

Type Description
array

Anomaly Scores

Source code in utils_reboot/utils.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def predict(self, X:np.array) -> np.array:

    """
    Overwrite the `predict` method of the parent class `AutoEncoder` from `pyod.models.auto_encoder` module to obtain the
    Anomaly Scores instead of the class labels (i.e. inliers and outliers)

    Args:
        X: Input dataset

    Returns:
        Anomaly Scores
    """
    score=self.decision_function(X)
    return score

DIF

Bases: DIF

Wrapper of pyod.models.dif.DIF

Source code in utils_reboot/utils.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
class DIF(oldDIF):

    """
    Wrapper of `pyod.models.dif.DIF`
    """

    def __init__(self, **kwargs):

        """
        Constructor of the class `DIF` which uses the constructor of the parent class `DIF` from `pyod.models.dif` module.

        Attributes:
            name (str): Add the name attribute to the class.
        """
        super().__init__(**kwargs)
        self.name = "DIF"

    def predict(self, X:np.array) -> np.array:

        """
        Overwrite the `predict` method of the parent class `DIF` from `pyod.models.dif` module to obtain the
        Anomaly Scores instead of the class labels (i.e. inliers and outliers)

        Args:
            X: Input dataset

        Returns:
            Anomaly Scores 

        """

        score=self.decision_function(X)
        return score

    def _predict(self,
                 X:np.array,
                 p:float)->np.array:

        """
        Method to predict the class labels based on the Anomaly Scores and the contamination factor `p`

        Args:
            X: Input dataset
            p: Contamination factor

        Returns:
            Class labels (i.e. 0 for inliers and 1 for outliers)
        """

        An_score = self.predict(X)
        y_hat = An_score > sorted(An_score,reverse=True)[int(p*len(An_score))]
        return y_hat

__init__(**kwargs)

Constructor of the class DIF which uses the constructor of the parent class DIF from pyod.models.dif module.

Attributes:

Name Type Description
name str

Add the name attribute to the class.

Source code in utils_reboot/utils.py
61
62
63
64
65
66
67
68
69
70
def __init__(self, **kwargs):

    """
    Constructor of the class `DIF` which uses the constructor of the parent class `DIF` from `pyod.models.dif` module.

    Attributes:
        name (str): Add the name attribute to the class.
    """
    super().__init__(**kwargs)
    self.name = "DIF"

predict(X)

Overwrite the predict method of the parent class DIF from pyod.models.dif module to obtain the Anomaly Scores instead of the class labels (i.e. inliers and outliers)

Parameters:

Name Type Description Default
X array

Input dataset

required

Returns:

Type Description
array

Anomaly Scores

Source code in utils_reboot/utils.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def predict(self, X:np.array) -> np.array:

    """
    Overwrite the `predict` method of the parent class `DIF` from `pyod.models.dif` module to obtain the
    Anomaly Scores instead of the class labels (i.e. inliers and outliers)

    Args:
        X: Input dataset

    Returns:
        Anomaly Scores 

    """

    score=self.decision_function(X)
    return score

sklearn_IsolationForest

Bases: IsolationForest

Wrapper of sklearn.ensemble.IsolationForest

Source code in utils_reboot/utils.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class sklearn_IsolationForest(IsolationForest):

    """
    Wrapper of `sklearn.ensemble.IsolationForest` 
    """

    def __init__(self, **kwargs):

        """
        Constructor of the class `sklearn_IsolationForest` which uses the constructor of the parent class `IsolationForest` from `sklearn.ensemble` module.

        Attributes:
            name (str): Add the name attribute to the class.
        """
        super().__init__(**kwargs)
        self.name = "sklearn_IF"

    def predict(self, X:np.array) -> np.array:

        """
        Overwrite the `predict` method of the parent class `IsolationForest` from `sklearn.ensemble` module to obtain the 
        Anomaly Scores instead of the class labels (i.e. inliers and outliers)

        Args:
            X: Input dataset

        Returns:
            Anomaly Scores 
        """

        score=self.decision_function(X)
        return -1*score+0.5

__init__(**kwargs)

Constructor of the class sklearn_IsolationForest which uses the constructor of the parent class IsolationForest from sklearn.ensemble module.

Attributes:

Name Type Description
name str

Add the name attribute to the class.

Source code in utils_reboot/utils.py
28
29
30
31
32
33
34
35
36
37
def __init__(self, **kwargs):

    """
    Constructor of the class `sklearn_IsolationForest` which uses the constructor of the parent class `IsolationForest` from `sklearn.ensemble` module.

    Attributes:
        name (str): Add the name attribute to the class.
    """
    super().__init__(**kwargs)
    self.name = "sklearn_IF"

predict(X)

Overwrite the predict method of the parent class IsolationForest from sklearn.ensemble module to obtain the Anomaly Scores instead of the class labels (i.e. inliers and outliers)

Parameters:

Name Type Description Default
X array

Input dataset

required

Returns:

Type Description
array

Anomaly Scores

Source code in utils_reboot/utils.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def predict(self, X:np.array) -> np.array:

    """
    Overwrite the `predict` method of the parent class `IsolationForest` from `sklearn.ensemble` module to obtain the 
    Anomaly Scores instead of the class labels (i.e. inliers and outliers)

    Args:
        X: Input dataset

    Returns:
        Anomaly Scores 
    """

    score=self.decision_function(X)
    return -1*score+0.5

get_feature_indexes(dataset, f1, f2)

Function to get the indexes of two features in the dataset given the feature names.

Parameters:

Name Type Description Default
dataset Type[Dataset]

Dataset

required
f1 Union[str, int]

Name of the first feature

required
f2 Union[str, int]

Name of the second feature

required

Returns:

Type Description
tuple[int, int]

Indexes of the two features in the dataset

Source code in utils_reboot/utils.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def get_feature_indexes(dataset:Type[Dataset],
                        f1:Union[str, int],
                        f2:Union[str, int]) -> tuple[int,int]:

    """
    Function to get the indexes of two features in the dataset given the feature names. 

    Args:
        dataset: Dataset
        f1: Name of the first feature
        f2: Name of the second feature

    Returns:
        Indexes of the two features in the dataset
    """

    if isinstance(f1,int) and isinstance(f2,int):
        return f1,f2

    feature_names=dataset.feature_names

    try:
        idx1=feature_names.index(f1)
    except:
        print('Feature name not valid')
    try: 
        idx2=feature_names.index(f2)
    except:
        print('Feature name not valid')

    return idx1,idx2

get_most_recent_file(directory_path, filetype='pickle')

Function to get the most recent file (i.e. last modified file) in a directory path.

Parameters:

Name Type Description Default
directory_path str

Directory path where the files are stored

required
filetype str

Type of the file (i.e. npz or pickle)

'pickle'

Returns:

Type Description
str

Path to the most recent file in the directory path

Source code in utils_reboot/utils.py
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def get_most_recent_file(directory_path:str,
                         filetype:str="pickle")->str:

    """
    Function to get the most recent file (i.e. last modified file) in a directory path.

    Args:
        directory_path: Directory path where the files are stored
        filetype: Type of the file (i.e. `npz` or `pickle`)

    Returns:
        Path to the most recent file in the directory path

    """

    assert filetype in ["pickle", "npz"], "filetype must be either 'pickle' or 'npz'"
    date_format = "%d-%m-%Y_%H-%M-%S"
    datetimes=[datetime.strptime(file[:19],date_format) for file in os.listdir(directory_path)]
    sorted_files=sorted(datetimes,reverse=True)
    most_recent_file=sorted_files[0].strftime(date_format)+f'_.{filetype}'
    return os.path.join(directory_path,most_recent_file)

open_element(file_path, filetype='pickle')

Function to open an element from a file (i.e. npz or pickle file) in the specified directory path.

Parameters:

Name Type Description Default
file_path str

Path to the file

required
filetype str

Type of the file (i.e. npz or pickle)

'pickle'

Returns:

Type Description
Union[array, list, DataFrame, Type[Precisions], Type[NewPrecisions], Type[Precisions_random]]

Element stored in the file

Source code in utils_reboot/utils.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def open_element(file_path:str,
                 filetype:str="pickle") -> Union[np.array,list,pd.DataFrame,Type[Precisions],Type[NewPrecisions],Type[Precisions_random]]:

    """
    Function to open an element from a file (i.e. `npz` or `pickle` file) in the specified directory path.

    Args:
        file_path: Path to the file
        filetype: Type of the file (i.e. `npz` or `pickle`)

    Returns:
        Element stored in the file
    """

    assert filetype in ["pickle", "npz"], "filetype must be either 'pickle' or 'npz'"
    if filetype == "pickle":
        with open(file_path, 'rb') as fl:
            element = pickle.load(fl)
    elif filetype == "npz":
        element = np.load(file_path)['element']
    return element

save_element(element, directory_path, filename='', filetype='pickle')

Function to save an element produced by an experiment in a file (i.e. npz or pickle file) in the specified directory path.

Parameters:

Name Type Description Default
element Union[array, list, DataFrame, Type[Precisions], Type[NewPrecisions], Type[Precisions_random]]

Element to be saved

required
directory_path str

Directory path where the file will be saved

required
filename str

Name of the file

''
filetype str

Type of the file (i.e. npz or pickle)

'pickle'

Returns:

Type Description
None

The method saves element and does not return any value

Source code in utils_reboot/utils.py
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def save_element(element:Union[np.array,list,pd.DataFrame,Type[Precisions],Type[NewPrecisions],Type[Precisions_random]],
                 directory_path:str,
                 filename:str="",
                 filetype:str="pickle") -> None:

    """
    Function to save an element produced by an experiment in a file (i.e. `npz` or `pickle` file) in the specified directory path.

    Args:
        element: Element to be saved
        directory_path: Directory path where the file will be saved
        filename: Name of the file
        filetype: Type of the file (i.e. `npz` or `pickle`)

    Returns:
        The method saves element and does not return any value 

    """

    assert filetype in ["pickle", "npz"], "filetype must be either 'pickle' or 'npz'"
    t = time.localtime()
    current_time = time.strftime("%d-%m-%Y_%H-%M-%S", t)
    filename = current_time + '_' + filename
    path = directory_path + '/' + filename
    if filetype == "pickle":
        with open(path+".pickle", 'wb') as fl:
            pickle.dump(element, fl)
    elif filetype == "npz":
        np.savez(path, element=element)

save_fs_prec(precs, path)

Function to save the feature selection precisions in a file (i.e. pickle file) in the specified directory path.

Parameters:

Name Type Description Default
precs namedtuple

Feature selection precisions

required
path str

Directory path where the file will be saved

required

Returns:

Type Description
None

The method saves the feature selection precisions and does not return any value

Source code in utils_reboot/utils.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
def save_fs_prec(precs:namedtuple,
                 path:str) -> None:

    """
    Function to save the feature selection precisions in a file (i.e. `pickle` file) in the specified directory path.

    Args:
        precs: Feature selection precisions
        path: Directory path where the file will be saved

    Returns:
        The method saves the feature selection precisions and does not return any value

    """

    #aucfs=sum(precs.inverse.mean(axis=1)-precs.direct.mean(axis=1))
    aucfs=np.nansum(np.nanmean(precs.inverse,axis=1)-np.nanmean(precs.direct,axis=1))
    new_precs = NewPrecisions(direct=precs.direct,
                            inverse=precs.inverse,
                            dataset=precs.dataset,
                            model=precs.model,
                            value=precs.value,
                            aucfs=aucfs)
    save_element(new_precs, path, filetype="pickle")

save_fs_prec_random(precs, path)

Function to save the feature selection precisions for random features in a file (i.e. pickle file) in the specified directory path.

Parameters:

Name Type Description Default
precs namedtuple

Feature selection precisions for random features

required
path str

Directory path where the file will be saved

required

Returns:

Type Description
None

The method saves the feature selection precisions for random features and does not return any value

Source code in utils_reboot/utils.py
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def save_fs_prec_random(precs:namedtuple,
                        path:str) -> None:

    """
    Function to save the feature selection precisions for random features in a file (i.e. `pickle` file) in the specified directory path.

    Args:
        precs: Feature selection precisions for random features
        path: Directory path where the file will be saved

    Returns:
        The method saves the feature selection precisions for random features and does not return any value
    """

    new_precs = Precisions_random(random=precs.random,
                            dataset=precs.dataset,
                            model=precs.model)
    save_element(new_precs, path, filetype="pickle")

select_pre_process()

Function to select the pre-processing of the dataset asking the user to input the pre-processing number.

This method was specifically designed to construct the tutorial.ipynb notebook for the documentation.

Returns:

Type Description
bool

Boolean value to indicate whether the dataset should be pre-processed or not

bool

(i.e. 1 to pre-process the dataset and 2 otherwise)

Source code in utils_reboot/utils.py
346
347
348
349
350
351
352
353
354
355
356
357
358
359
def select_pre_process() -> bool:

    """
    Function to select the pre-processing of the dataset asking the user to input the pre-processing number.

    This method was specifically designed to construct the `tutorial.ipynb` notebook for the documentation.

    Returns:
        Boolean value to indicate whether the dataset should be pre-processed or not 
        (i.e. 1 to pre-process the dataset and 2 otherwise)
    """
    pre_process=int(input("Press 1 to pre process the dataset, 2 otherwise: "))
    assert pre_process in [1,2], "Input values not recognized: Accepted values: [1,2]"
    return pre_process==1

select_pre_process_scenario(dataset)

Combine the selection of the pre-processing of the dataset and the scenario for the experiment.

Parameters:

Name Type Description Default
dataset Type[Dataset]

Dataset to be used in the experiment

required

Returns:

Type Description
int

The selected scenario number (i.e. 1 for Scenario 1 and 2 for Scenario 2)

Source code in utils_reboot/utils.py
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
def select_pre_process_scenario(dataset:Type[Dataset]) -> int:

    """
    Combine the selection of the pre-processing of the dataset and the scenario for the experiment.

    Args:
        dataset: Dataset to be used in the experiment

    Returns:
        The selected scenario number (i.e. 1 for Scenario 1 and 2 for Scenario 2)
    """
    pre_process=select_pre_process()
    scenario=select_scenario()

    if scenario==2:
        dataset.split_dataset(train_size=1-dataset.perc_outliers,contamination=0)

    if pre_process==1:
        dataset.pre_process()
        print("Dataset pre processed\n")
    elif scenario==2 and not pre_process==2:
        print("Dataset not preprocessed\n")
        dataset.initialize_test()
    elif scenario==1 and not pre_process==2:
        print("Dataset not preprocessed\n")
        dataset.initialize_train_test()

    print(f'Scenario: {scenario}\n')

    print(f'X_train shape: {dataset.X_train.shape}')
    print(f'X_test shape: {dataset.X_test.shape}')

    return scenario 

select_scenario()

Function to select the scenario for the experiment (i.e. Scenario 1 or Scenario 2) asking the user to input the scenario number.

This method was specifically designed to construct the tutorial.ipynb notebook for the documentation.

Returns:

Type Description
int

The selected scenario number (i.e. 1 for Scenario 1 and 2 for Scenario 2)

Source code in utils_reboot/utils.py
331
332
333
334
335
336
337
338
339
340
341
342
343
344
def select_scenario() -> int:

    """
    Function to select the scenario for the experiment (i.e. Scenario 1 or Scenario 2) asking the user to input the scenario number.

    This method was specifically designed to construct the `tutorial.ipynb` notebook for the documentation.

    Returns:
        The selected scenario number (i.e. 1 for Scenario 1 and 2 for Scenario 2)
    """

    scenario=int(input("Press 1 for scenario 1 and 2 for scenario 2: "))
    assert scenario in [1,2], "Scenario not recognized: Accepted values: [1,2]"
    return scenario