Source code for carate.config_adapter.config

"""
Module for serialization and deserialization of inputs. The aim is to
keep web-first attitude, even though when using files locally. If there
is text files then there is a need to convert them.

@author = Julian M. Kleber
"""
import torch
from typing import Type, Optional, Dict, TypeVar, Any, Generic

from amarium.utils import convert_str_to_bool

from carate.evaluation import base, classification, regression
from carate.models import (
    cgc_classification,
    cgc_regression,
    g_classification,
    g_regression,
    cc_classification,
    cc_regression,
    gcc_classification,
    gcc_regression,
    gatv2_classification,
    gatv2_regression,
    graph_transformer_classification,
    graph_transformer_regression,
)

from carate.loader.load_data import (
    DatasetObject,
    StandardPytorchGeometricDataset,
    StandardDatasetTUDataset,
    StandardDatasetMoleculeNet,
)
from carate.utils.convert_to_json import convert_py_to_json
from carate.logging.metrics_logger import MetricsLogger

EvaluationMap: Dict[str, base.Evaluation]
EVALUATION_MAP = {
    "regression": regression.RegressionEvaluation,
    "classification": classification.ClassificationEvaluation,
    "evaluation": base.Evaluation,
}

ModelMap: Dict[str, Any]
MODEL_MAP = {
    "cgc_classification": cgc_classification,
    "cgc_regression": cgc_regression,
    "g_classification": g_classification,
    "g_regression": g_regression,
    "cc_classification": cc_classification,
    "cc_regression": cc_regression,
    "gatv2_classification": gatv2_classification,
    "gatv2_regression": gatv2_regression,
    "graph_transformer_classification": graph_transformer_classification,
    "graph_transformer_regression": graph_transformer_regression,
}

DATA_SET_MAP: Dict[
    str,
    Type[StandardDatasetMoleculeNet]
    | Type[StandardPytorchGeometricDataset]
    | Type[StandardPytorchGeometricDataset],
]
DATA_SET_MAP = {
    "StandardPyG": StandardPytorchGeometricDataset,
    "StandardTUD": StandardDatasetTUDataset,
    "StandardMoleculeNet": StandardDatasetMoleculeNet,
}



[docs]
class Config:
    """
    The Config class is an object representation of the configuration of the model. It aims to provide a middle layer between
    some user input and the run interface. It is also possible to use it via the web because of the method overload of the constructor.

    :author: Julian M. Kleber
    """

    def __init__(
        self,
        dataset_name: str,
        num_features: int,
        num_classes: int,
        result_save_dir: str,
        model_save_freq: int,
        Evaluation: base.Evaluation,
        data_set: DatasetObject,
        model: Any,
        logger: Any, 
        optimizer: str,
        device: str = "auto",
        net_dimension: int = 364,
        learning_rate: float = 0.0005,
        dataset_save_path: str = ".",
        test_ratio: int = 20,
        batch_size: int = 64,
        shuffle: bool = True,
        num_cv: int = 5,
        num_epoch: int = 150,
        override: bool = True,
        resume: bool = False,
        normalize: bool = False,
        num_heads: int = 3,
        dropout_gat: float = 0.6,
        dropout_forward: float = 0.5,
        custom_size: Optional[int] = None,
    ):


        # modelling

        self.model = model
        self.optimizer = optimizer
        self.device = device
        self.Evaluation = Evaluation
        self.data_set = data_set
        self.normalize = bool(normalize)

        # model parameters

        self.dataset_name = dataset_name
        self.num_classes = num_classes
        self.num_features = num_features
        self.net_dimension = net_dimension
        self.num_heads = num_heads
        self.dropout_gat = dropout_gat
        self.dropout_forward = dropout_forward

        # evaluation parameters

        self.result_save_dir = result_save_dir
        self.model_save_freq = model_save_freq
        self.override = override

        # training
        self.resume = resume
        self.learning_rate = learning_rate
        self.test_ratio = test_ratio
        self.batch_size = batch_size
        self.custom_size = custom_size
        self.num_cv = num_cv
        self.num_epoch = num_epoch

        # data
        self.dataset_name = dataset_name
        self.dataset_save_path = dataset_save_path
        self.shuffle = shuffle
        self.logger = logger




[docs]
class ConfigInitializer:

[docs]
    @classmethod
    def from_file(cls, file_name: str) -> Config:
        """
        The from_file function takes a file name as an argument and returns a Config object.
        The function reads the file, converts it to JSON, then uses the from_json method to create
        the Config object.

        :param cls: Used to create a new instance of the class.
        :param file_name:str: Used to specify the name of the file to be used.
        :return: A config object.

        :doc-author: Julian M. Kleber
        """

        json_object = convert_py_to_json(file_name)
        config_object = ConfigInitializer.from_json(json_object = json_object)
        return config_object



[docs]
    @classmethod
    def from_json(cls, json_object: Dict[Any, Any]) -> Config:
        """
        The from_json function is a class method that takes in a json object and returns an instance of the Config class.
        The function is used to load the configuration from a file, which can be done by calling:
            config = Config.from_json(json_object)

        :param cls: Used to Create an instance of the class that is calling this method.
        :param json_object:dict: Used to Pass in the json object that is read from the file.
        :return: A class object.

        :doc-author: Julian M. Kleber
        """

        if json_object["device"] == "cpu":
            device = torch.device("cpu")
        elif json_object["device"] == "cuda":
            device = torch.device("cuda")
        elif json_object["device"] == "auto":
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        else:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        if "resume" in list(json_object.keys()):
            resume = json_object["resume"]
        else:
            resume = False

        if "normalize" in json_object.keys():
            normalize = json_object["normalize"]
        else:
            normalize = False

        if "custom_size" in json_object.keys():
            custom_size = json_object["custom_size"]
        else:
            custom_size = None

        if "num_heads" in json_object.keys():
            num_heads = json_object["num_heads"]
        else:
            num_heads = 3

        if "dropout_forward" in json_object.keys():
            dropout_forward = json_object["dropout_forward"]
        else:
            dropout_forward = 3

        if "dropout_gat" in json_object.keys():
            dropout_gat = json_object["dropout_gat"]
        else:
            dropout_gat = 0.6
        
        if "log_save_dir" not in json_object.keys(): 
            log_save_dir = json_object["result_save_dir"]
        else: 
            log_save_dir = json_object["log_save_dir"]
        
        metrics_logger = MetricsLogger(log_save_dir)
        metrics_logger.logger.info("Initializing configuration for the config file ")
        metrics_logger.logger.info("The configuration is: " + str(json_object))
        data_set = DATA_SET_MAP[json_object["data_set"]](
            dataset_save_path=json_object["dataset_save_path"],
            dataset_name=json_object["dataset_name"],
            test_ratio=json_object["test_ratio"],
            batch_size=json_object["batch_size"],
            shuffle=json_object["shuffle"],
        )

        evaluation = EVALUATION_MAP[json_object["evaluation"]](
            dataset_name=json_object["dataset_name"],
            dataset_save_path=json_object["dataset_save_path"],
            test_ratio=json_object["test_ratio"],
            model_net=json_object["model"],
            optimizer=json_object["optimizer"],
            data_set=data_set,
            result_save_dir=json_object["result_save_dir"],
            model_save_freq=json_object["model_save_freq"],
            device=device,
            resume=resume,
            logger = metrics_logger
        )
        json_object["override"] = convert_str_to_bool(json_object["override"])

        return Config(
            model=MODEL_MAP[json_object["model"]],
            optimizer=json_object["optimizer"],
            device=device,
            Evaluation=evaluation,
            data_set=data_set,
            # model parameters
            dataset_name=str(json_object["dataset_name"]),
            num_classes=int(json_object["num_classes"]),
            num_features=int(json_object["num_features"]),
            net_dimension=int(json_object["net_dimension"]),
            learning_rate=float(json_object["learning_rate"]),
            # evaluation parameters
            dataset_save_path=str(json_object["dataset_save_path"]),
            test_ratio=int(json_object["test_ratio"]),
            batch_size=int(json_object["batch_size"]),
            shuffle=bool(json_object["shuffle"]),
            num_cv=int(json_object["num_cv"]),
            num_epoch=int(json_object["num_epoch"]),
            result_save_dir=str(json_object["result_save_dir"]),
            model_save_freq=int(json_object["model_save_freq"]),
            override=json_object["override"],
            resume=resume,
            normalize=bool(normalize),
            num_heads=num_heads,
            dropout_forward=dropout_forward,
            dropout_gat=dropout_gat,
            custom_size=custom_size,
            logger = metrics_logger
        )