Source code for immuneML.hyperparameter_optimization.config.ReportConfig

from scripts.specification_util import update_docs_per_mapping



[docs]
class ReportConfig:
    """
    A class encapsulating different report lists which can be executed while performing nested cross-validation (CV) using TrainMLModel
    instruction. All arguments are optional.

    Arguments:

        data (dict): :ref:`Data reports` to be executed on the whole dataset before it is split to training/test or training/validation

        data_splits (dict): :ref:`Data reports` to be executed after the data has been split into training and test (assessment CV loop) or training and validation (selection CV loop) datasets before they are encoded

        models (dict): :ref:`ML model reports` to be executed on all trained classifiers

        encoding (dict): :ref:`Encoding reports` to be executed on each of the encoded training/test datasets or training/validation datasets

    YAML specification:

    .. indent with spaces
    .. code-block:: yaml

        # as a part of a TrainMLModel instruction, defining the outer (assessment) loop of nested cross-validation:
        assessment: # outer loop of nested CV
            split_strategy: random # perform Monte Carlo CV (randomly split the data into train and test)
            split_count: 5 # how many train/test datasets to generate
            training_percentage: 0.7 # what percentage of the original data should be used for the training set
            reports: # reports to execute on training/test datasets, encoded datasets and trained ML methods
                data_splits: # list of reports to execute on training/test datasets (before they are preprocessed and encoded)
                    - my_data_split_report
                encoding: # list of reports to execute on encoded training/test datasets
                    - my_encoding_report

        # as a part of a TrainMLModel instruction, defining the inner (selection) loop of nested cross-validation:
        selection: # inner loop of nested CV
            split_strategy: random # perform Monte Carlo CV (randomly split the data into train and validation)
            split_count: 5 # how many train/validation datasets to generate
            training_percentage: 0.7 # what percentage of the original data should be used for the training set
            reports: # reports to execute on training/validation datasets, encoded datasets and trained ML methods
                data_splits: # list of reports to execute on training/validation datasets (before they are preprocessed and encoded)
                    - my_data_split_report
                encoding: # list of reports to execute on encoded training/validation datasets
                    - my_encoding_report
                models:
                    - my_ml_model_report

    """

    def __init__(self, data_splits: dict = None, models: dict = None, data: dict = None, encoding: dict = None):

        self.data_split_reports = data_splits if data_splits is not None else {}
        self.encoding_reports = encoding if encoding is not None else {}
        self.model_reports = models if models is not None else {}
        self.data_reports = data if data is not None else {}


[docs]
    @staticmethod
    def get_documentation():
        doc = str(ReportConfig.__doc__)
        mapping = {
            "data_splits (dict)": "data_splits",
            "models (dict)": "models",
            "data (dict)": "data",
            "encoding (dict)": "encoding"
        }
        doc = update_docs_per_mapping(doc, mapping)
        return doc