Source code for carate.evaluation.base

"""
This is the heart of the application and trains / tests a algorithm on a given dataset.
The idea is to parametrize as much as possible.

:author: Julian M. Kleber
"""
from typing import Type, Any, Dict, Optional
import json
import numpy as np

import logging

from sklearn import metrics
import torch
import torch.nn.functional as F
from amarium.utils import check_make_dir, prepare_file_name_saving

from carate.utils.model_files import (
    save_model_training_checkpoint,
    save_model_parameters,
    load_model_training_checkpoint,
)

from carate.loader.load_data import (
    DatasetObject,
)
from carate.default_interface import DefaultObject
from carate.models.base_model import Model





[docs]
class Evaluation(DefaultObject):

    """
    The evaluation class is about evaluating a given model written in PyTorch or PyTorchGeometric.
    """

    name = "Default evaluation"

    def __init__(
        self,
        dataset_name: str,
        dataset_save_path: str,
        result_save_dir: str,
        model_net: Model,
        optimizer: torch.optim.Optimizer,
        data_set: DatasetObject,
        device: torch.device,
        logger: Any, 
        resume: bool,
        test_ratio: int,
        num_epoch: int = 150,
        num_cv: int = 5,
        num_classes: int = 2,
        out_dir: str = r"./out",
        batch_size: int = 64,
        shuffle: bool = True,
        model_save_freq: int = 100,
        override: bool = True,
        normalize: bool = False,
        custom_size: Optional[int] = None,
    ) -> None:
        """

        :param self: Used to Refer to the object instance itself, and is used to
        access variables that belongs to the class.
        :param model: Used to Specify the model that will be trained.
        :param optimizer: Used to Define the optimizer that will be used
        to train the model.
        :param data_set:Type[DatasetObject]: Used to Specify the type of data loader
        that is used. Is implemented according to the interface given in load_data.py
        by the class DatasetObject.load_data().

        :param epoch:int=150: Used to Set the number of epochs to train for.
        :param num_cv:int=5: Used to Specify the number of cross validations
        that will be used in the training process.
        :param num_classes:int=2: Used to Define the number of classes in the dataset.
        :param out_dir:str="out": Used to Specify the directory where the output of your training will be stored.
        :return: The following:.

        :doc-author: Julian M. Kleber
        """
        # model
        self.model_net = model_net
        self.num_classes = num_classes

        # training

        self.num_cv = num_cv
        self.optimizer = optimizer
        self.batch_size = batch_size
        self.test_ratio = test_ratio
        self.num_epoch = num_epoch
        self.result_save_dir = result_save_dir
        self.model_save_freq = model_save_freq
        self.override = override
        self.train_store = None
        self.resume = resume

        # data

        self.dataset_name = dataset_name
        self.dataset_save_path = dataset_save_path
        self.logger = logger
        self.out_dir = out_dir
        self.data_set = data_set
        self.shuffle = shuffle
        self.custom_size = custom_size
        self.normalize = normalize

        # hardware

        self.device = device
        

[docs]
    def cv(
        self,
        num_cv: int,
        num_epoch: int,
        num_classes: int,
        dataset_name: str,
        dataset_save_path: str,
        logger: Any, 
        test_ratio: int,
        resume: bool,
        data_set: DatasetObject,
        shuffle: bool,
        batch_size: int,
        model_net: Model,
        optimizer: torch.optim.Optimizer,
        device: torch.device,
        result_save_dir: str,
        model_save_freq: int,
        override: bool = True,
        normalize:bool = False,
        custom_size: Optional[int] = None,
    ) -> Dict[str, Any]:
        """
        The function is the core of the evaluation. The results are saved on disk during
        the run and returned as json at the end of the run.

        :param self: Used to Represent the instance of the class.
        :param num_cv:int: Used to specify the number of cross-validation folds.
        :param num_epoch:int: Used to Specify the number of epochs to train for.
        :param num_classes:int: Used to Determine the number of classes in the dataset.
        :param dataset_name:str: Used to Specify the name of the dataset to be used.
        :param DataSetType[DatasetObject]: Used to Load the data.
        :param : Used to Specify the number of folds in a (stratified)kfold,.
        :return: A list of dictionaries.

        :doc-author: Trelent
        """

        (
            num_cv,
            num_epoch,
            num_classes,
            dataset_name,
            dataset_save_path,
            logger,
            test_ratio,
            resume,
            data_set,
            shuffle,
            batch_size,
            model_net,
            optimizer,
            device,
            result_save_dir,
            model_save_freq,
            override,
            normalize,
            custom_size,
        ) = self._get_defaults(locals())
        result = []

        tmp = {}
        save_model_parameters(model_net, save_dir=result_save_dir)
        logging.info("Starting " + str(num_cv) +  " CVs for "+ dataset_name +
                            " with data_stored in " + dataset_save_path )
        for i in range(num_cv):
            (
                test_dataset,
                train_dataset,
                test_loader,
                train_loader,
                loaded_dataset,
            ) = data_set.load_data(
                dataset_name=dataset_name,
                dataset_save_path=dataset_save_path,
                test_ratio=test_ratio,
                batch_size=batch_size,
                shuffle=shuffle,
                custom_size=custom_size,
            )
            # storage containers
            logging.info("Starting CV "+str(i))
            acc_store_train = []
            acc_store_test = []
            auc_store = []
            loss_store = []

            for epoch in range(1, num_epoch + 1):
                train_accuracy, train_loss = self.train(
                    epoch=epoch,
                    model_net=model_net,
                    device=device,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    num_classes=num_classes,
                )
                loss_store.append(train_loss.cpu().tolist())
                train_acc = self.test(
                    train_loader,
                    device=device,
                    model_net=model_net,
                    test=False,
                )  # test False for storing the results
                test_acc, self.train_store = self.test(
                    test_loader,
                    device=device,
                    model_net=model_net,
                    epoch=epoch,
                    test=True,
                )
                acc_store_train.append(train_acc.cpu().tolist())
                acc_store_test.append(test_acc.cpu().tolist())
                logger.log({
                    "Epoch": epoch,
                    "Train_ACC": "{:16f}".format(train_acc),
                    "Train_Loss": "{:16f}".format(train_loss),
                    "Test_Acc": "{:16f}".format(test_acc), 
                })
                preds = np.zeros((len(test_dataset)))
                features = self.train_store

                for j in range(len(test_dataset)):
                    preds[j] = test_dataset[j].y

                preds = torch.as_tensor(preds)
                preds = F.one_hot(preds.long(), num_classes=num_classes)
                store_auc = []

                for j in range(len(features[0, :])):
                    auc = metrics.roc_auc_score(preds[:, j], features[:, j])
                    logging.info("AUC of " + str(j) + "is:" + str(auc))
                    store_auc.append(auc)

                auc_store.append(store_auc)

                tmp["Loss"] = list(loss_store)
                tmp["Acc_train"] = list(acc_store_train)
                tmp["Acc_test"] = list(acc_store_test)
                tmp["AUC"] = list(auc_store)

                if epoch % model_save_freq == 0:
                    self.save_whole_checkpoint(
                        result_save_dir=result_save_dir,
                        dataset_name=dataset_name,
                        num_cv=i,
                        num_epoch=epoch,
                        model_net=model_net,
                        data=tmp,
                        optimizer=optimizer,
                        loss=train_loss,
                        override=override,
                    )
            result.append(tmp)
        return result



[docs]
    def train(
        self,
        epoch: int,
        model_net: Model,
        device: torch.device,
        train_loader: Type[torch.utils.data.DataLoader],
        optimizer: torch.optim.Optimizer,
        num_classes: int,
    ):
        """
        The train function is used to train the model.
        The function takes in a number of epochs and a model,
        and returns the accuracy on the test set.

        :param epoch: Used to Determine when to stop training.
        :param model: Used to Pass the model to the function.
        :param device: Used to Tell the model which device to use.
        :param train_loader: Used to Load the training data.
        :param test_loader: Used to Evaluate the model on the test data.
        :param optimizer: Used to Specify the optimizer that will be used in training.
        :param num_classes=2: Used to Specify the number of classes in the data.
        :param shrikage=51: Used to Make sure that the model is trained for at least 51 epochs.
        :return: The accuracy of the model on the training set.

        :doc-author: Trelent
        """
        model_net.train()

        correct = 0
        for data in train_loader:
            data.x = data.x.type(torch.FloatTensor)
            data.y = F.one_hot(data.y.long(), num_classes=num_classes).type(
                torch.FloatTensor
            )
            data = data.to(device)
            optimizer.zero_grad()
            output_probs = model_net(data.x, data.edge_index, data.batch)
            output = (output_probs > 0.5).float()
            loss = torch.nn.BCELoss()
            loss = loss(output_probs, data.y)
            loss.backward()
            optimizer.step()
            correct += (output == data.y).float().sum() / num_classes
        accuracy = correct / len(train_loader.dataset)
        return accuracy, loss



[docs]
    def test(
        self,
        test_loader: torch.utils.data.DataLoader,
        model_net: Model,
        device: torch.device,
        **kwargs: Any,
    ) -> Any:
        """
        The test function is used to test the model on a dataset.
        It returns the accuracy of the model on that dataset calculated as
        the average of the atomic accuracy for each batch in the Dataset

        :param test_loader: Used to pass the test data loader.
        :param epoch: Used to keep track of the current epoch.
        :param model_net: Used to pass the model to the test function.
        :param device: Used to tell torch which device to use.
        :param test=False: Used to distinguish between training
        and testing.
        :return: The accuracy of the model on the test data.

        :doc-author: Julian M. Kleber
        """
        test = bool(kwargs["test"])
        model_net.eval()
        correct = 0
        if test:
            outs = []
        for data in test_loader:
            data.x = data.x.type(torch.FloatTensor)
            data = data.to(device)
            output_probs = model_net(data.x, data.edge_index, data.batch)
            output = (output_probs > 0.5).float()
            correct += (torch.argmax(output, dim=1) == data.y).float().sum()
            if test:
                outs.append(output.cpu().detach().numpy())
        if test:
            outputs = np.concatenate(outs, axis=0).astype(float)
            return (
                correct / len(test_loader.dataset),
                outputs,
            )  # TODO this is from some "quick experiment" not sure if the line and functionality is
        return correct / len(test_loader.dataset)



[docs]
    def save_result(
        self,
        result_save_dir: str,
        dataset_name: str,
        num_cv: int,
        num_epoch: int,
        data: dict,
    ) -> None:
        """
        The save_result function saves the results of a cross-validation run to a .json file. The goal is to provide
        a json interface of cv results for later analysis of the training runs.


        :param self: Used to represent the instance of the class.
        :param result_save_dir:str: Used to specify the directory where the results will be saved.
        :param dataset_name:str: Used to identify the dataset.
        :param num_cv:int: Used to specify the number of cross validation runs.
        :param num_epoch int: Epoch the run was saved in
        :param data:dict: Used to store the results of each cross validation run.
        :return: None.

        :doc-author: Julian M. Kleber
        """

        prefix = result_save_dir + "/data/" + "CV_" + str(num_cv)
        file_name = prepare_file_name_saving(
            prefix=prefix,
            file_name=dataset_name,
            suffix=".json",
        )
        with open(file_name, "w") as f:
            json.dump(data, f)
            logging.info(
                "Saved"
                + str(num_epoch)
                + "of cv"
                + str(num_cv)
                + " run to "
                + result_save_dir
                + dataset_name
                + "_"
                + str(num_cv)
                + ".csv"
            )



[docs]
    def save_whole_checkpoint(
        self,
        result_save_dir: str,
        dataset_name: str,
        num_cv: int,
        num_epoch: int,
        model_net: Type[torch.nn.Module],
        data: dict,
        optimizer: Type[torch.optim.Optimizer],
        loss: float,
        override: bool = True,
    ) -> None:
        """
        The save_whole_checkpoint function saves the model checkpoint and
        results for a given epoch.

        The save_whole_checkpoint function saves the model checkpoint
        and results for a given epoch. It is called by the train function
        in order to save checkpoints at regular intervals during training,
        as well as after each cross-validation fold has been trained on.
        The saved files are used to resume training if it is interrupted,
        or to evaluate performance of different models on test data without
        having to retrain them from scratch.

        :param self: Used to Represent the instance of the class.
        :param result_save_dir:str: Used to Specify the directory where
        the checkpoint will be saved.
        :param dataset_name:str: Used to Name the dataset.
        :param num_cv:int: Used to Specify the cross validation number.
        :param num_epoch:int: Used to Specify the number of epochs that have
        been completed.
        :param model_net:Type[torch.nn.Module]: Used to Save the model.
        :param data:dict: Used to Save the data, which is a dictionary containing
        the training and validation data.
        :param optimizer:Type[torch.optim.Optimizer]: Used to Save the optimizer
        state.
        :param loss:float: Used to Save the loss value.
        :param override:bool=True: Used to Override the previous checkpoint.
        :param : Used to Save the model.
        :return: None.

        :doc-author: Julian M. Kleber
        """

        self.save_model_checkpoint(
            result_save_dir=result_save_dir,
            dataset_name=dataset_name,
            num_cv=num_cv,
            num_epoch=num_epoch,
            model_net=model_net,
            optimizer=optimizer,
            loss=loss,
            override=override,
        )

        self.save_result(
            result_save_dir=result_save_dir,
            dataset_name=dataset_name,
            data=data,
            num_cv=num_cv,
            num_epoch=num_epoch,
        )
        logging.info(
            f"Successfully saved a checkpoint for epoch {num_epoch} in CV {num_cv}"
        )



[docs]
    def save_model_checkpoint(
        self,
        result_save_dir: str,
        dataset_name: str,
        num_cv: int,
        num_epoch: int,
        model_net: Type[torch.nn.Module],
        optimizer: Type[torch.optim.Optimizer],
        loss: float,
        override: bool = True,
    ) -> None:
        """
        The save_model function saves the model to a file.

        The save_model function saves the model to a file. The filename is based on
        the dataset name, number of cross-validation folds, and epoch number. The
        file is saved in the result_save_dir directory with an extension of .pt (for
        PyTorch). If this directory does not exist, it will be created before saving
        the file.

        :param result_save_dir:str: Used to specify the directory where the model will be saved.
        :param dataset_name:str: Used to save the model with a name that includes the dataset it was trained on.
        :param num_cv:int: Used to specify which cross validation fold the model is being saved for.
        :param num_epoch:int: Used to save the model at a certain epoch.
        :param model_net:Type[torch.nn.Module]: Used to save the model.
        :param : Used to save the model at a certain frequency.
        :return: None.

        :doc-author: Julian M. Kleber
        """

        save_model_training_checkpoint(
            result_save_dir=result_save_dir,
            dataset_name=dataset_name,
            num_cv=num_cv,
            num_epoch=num_epoch,
            model_net=model_net,
            optimizer=optimizer,
            loss=loss,
            override=override,
        )



[docs]
    def load_model_checkpoint(
        self,
        checkpoint_path: str,
        model_net: Model,
        optimizer=torch.optim.Optimizer,
    ) -> Model:
        """
        The load_model_checkpoint function loads a model checkpoint from the specified path.

        The function loads a model checkpoint from the specified path, and sets it as the
        model of this evaluation object. The function also returns that loaded model.

        :param self: Used to Refer to the object itself.
        :param checkpoint_path:str: Used to Specify the path to the checkpoint file.
        :param model_net:Model: Used to Specify the model that is being loaded.
        :param optimizer=torch.optim.Optimizer: Used to Load the optimizer state
        from a checkpoint.
        :param : Used to Load the model checkpoint.
        :return: The model.

        :doc-author: Julian M. Kleber
        """

        model_net_cp = load_model_training_checkpoint(
            checkpoint_path=checkpoint_path, model_net=model_net, optimizer=optimizer
        )
        self.model_net = (
            model_net_cp  # set the model of the evaluation object to the checkpoint
        )
        return model_net_cp


    def __str__(self):
        return "Evaluation for " + str(self.model_net) + " with the " + self.name

    def __repr__(self):
        return "Standard Evaluation Object"