Source code for immuneML.reports.train_ml_model_reports.MLSubseqPerformance

import warnings
from pathlib import Path

from immuneML.encodings.kmer_frequency.KmerFrequencyEncoder import KmerFrequencyEncoder
from immuneML.environment.Label import Label
from immuneML.hyperparameter_optimization.states.TrainMLModelState import TrainMLModelState
from immuneML.ml_methods.DeepRC import DeepRC
from immuneML.reports.train_ml_model_reports.MLSettingsPerformance import MLSettingsPerformance


[docs]class MLSubseqPerformance(MLSettingsPerformance):
    """
    Report for TrainMLModel: Similar to :py:obj:`~immuneML.reports.ml_reports.MLSettingsPerformance.MLSettingsPerformance`,
    this report plots the performance of certain combinations of encodings and ML methods.

    Similarly to MLSettingsPerformance, the performances are grouped by label (horizontal panels).
    However, the bar color is determined by the ml method class (thus several ML methods with different parameters
    may be grouped together) and the vertical panel grouping is determined by the subsequence size used for motif recovery.
    This subsequence size is either the k-mer size or the kernel size (DeepRC).

    This report can only be used to plot the results for setting combinations using k-mer encoding with continuous k-mers
    (in combination with any ML method), or DeepRC encoding + ml method.

    This report can only be used with TrainMLModel instruction under 'reports'.


    YAML specification:

    .. indent with spaces
    .. code-block:: yaml

        my_hp_report: MLSubseqPerformance

    """

[docs]    @classmethod
    def build_object(cls, **kwargs):
        return MLSubseqPerformance(kwargs["name"] if "name" in kwargs else None)

    def __init__(self, name: str = None, state: TrainMLModelState = None, label: Label = None, result_path: Path = None, number_of_processes: int = 1):
        super().__init__(name=name, state=state, label=label, result_path=result_path, number_of_processes=number_of_processes)
        self.vertical_grouping = "subsequence_size"
        self.result_name = "subseq_performance"

    def _get_vertical_grouping(self, assessment_item):
        subseq_size = "N/A"

        if isinstance(assessment_item.hp_setting.ml_method, DeepRC):
            subseq_size = assessment_item.hp_setting.ml_method.kernel_size
        elif assessment_item.hp_setting.encoder.__module__.endswith("KmerFrequencyEncoder"):
            subseq_size = assessment_item.hp_setting.encoder_params['k']

        return f"k-mer/kernel\nsize {subseq_size}"

    def _get_color_grouping(self, assessment_item):
        return assessment_item.hp_setting.ml_method.__class__.__name__

    def _check_valid_assessment_item(self, assessment_item):
        is_valid = False

        if isinstance(assessment_item.hp_setting.ml_method, DeepRC):
            is_valid = True
        elif assessment_item.hp_setting.encoder.__class__.__name__ in KmerFrequencyEncoder.dataset_mapping.values():
            if assessment_item.hp_setting.encoder_params['sequence_encoding'].lower() in ('continuous_kmer', 'imgt_continuous_kmer'):
                is_valid = True

        return is_valid

[docs]    def check_prerequisites(self):
        run_report = True

        if self.state is None:
            warnings.warn(
                f"{self.__class__.__name__} can only be executed as a hyperparameter report. MLSubseqPerformance report will not be created.")
            run_report = False

        if self.result_path is None:
            warnings.warn(f"{self.__class__.__name__} requires an output 'path' to be set. {self.__class__.__name__} report will not be created.")
            run_report = False

        for assessment_state in self.state.assessment_states:
            for label_state in assessment_state.label_states.values():
                for assessment_item in label_state.assessment_items.values():
                    if not self._check_valid_assessment_item(assessment_item):
                        warnings.warn(f"{self.__class__.__name__} can only be used on encoder-ML method combinations that use k-mer encoding"
                                      f"with continuous k-mers, or DeepRC. {self.__class__.__name__} report will not be created.")
                        run_report = False

        return run_report