Source code for immuneML.reports.train_ml_model_reports.MLSubseqPerformance
import warnings
from pathlib import Path
from immuneML.encodings.kmer_frequency.KmerFrequencyEncoder import KmerFrequencyEncoder
from immuneML.environment.Label import Label
from immuneML.hyperparameter_optimization.states.TrainMLModelState import TrainMLModelState
from immuneML.ml_methods.DeepRC import DeepRC
from immuneML.reports.train_ml_model_reports.MLSettingsPerformance import MLSettingsPerformance
[docs]class MLSubseqPerformance(MLSettingsPerformance):
"""
Report for TrainMLModel: Similar to :py:obj:`~immuneML.reports.ml_reports.MLSettingsPerformance.MLSettingsPerformance`,
this report plots the performance of certain combinations of encodings and ML methods.
Similarly to MLSettingsPerformance, the performances are grouped by label (horizontal panels).
However, the bar color is determined by the ml method class (thus several ML methods with different parameters
may be grouped together) and the vertical panel grouping is determined by the subsequence size used for motif recovery.
This subsequence size is either the k-mer size or the kernel size (DeepRC).
This report can only be used to plot the results for setting combinations using k-mer encoding with continuous k-mers
(in combination with any ML method), or DeepRC encoding + ml method.
This report can only be used with TrainMLModel instruction under 'reports'.
YAML specification:
.. indent with spaces
.. code-block:: yaml
my_hp_report: MLSubseqPerformance
"""
[docs] @classmethod
def build_object(cls, **kwargs):
return MLSubseqPerformance(kwargs["name"] if "name" in kwargs else None)
def __init__(self, name: str = None, state: TrainMLModelState = None, label: Label = None, result_path: Path = None, number_of_processes: int = 1):
super().__init__(name=name, state=state, label=label, result_path=result_path, number_of_processes=number_of_processes)
self.vertical_grouping = "subsequence_size"
self.result_name = "subseq_performance"
def _get_vertical_grouping(self, assessment_item):
subseq_size = "N/A"
if isinstance(assessment_item.hp_setting.ml_method, DeepRC):
subseq_size = assessment_item.hp_setting.ml_method.kernel_size
elif assessment_item.hp_setting.encoder.__module__.endswith("KmerFrequencyEncoder"):
subseq_size = assessment_item.hp_setting.encoder_params['k']
return f"k-mer/kernel\nsize {subseq_size}"
def _get_color_grouping(self, assessment_item):
return assessment_item.hp_setting.ml_method.__class__.__name__
def _check_valid_assessment_item(self, assessment_item):
is_valid = False
if isinstance(assessment_item.hp_setting.ml_method, DeepRC):
is_valid = True
elif assessment_item.hp_setting.encoder.__class__.__name__ in KmerFrequencyEncoder.dataset_mapping.values():
if assessment_item.hp_setting.encoder_params['sequence_encoding'].lower() in ('continuous_kmer', 'imgt_continuous_kmer'):
is_valid = True
return is_valid
[docs] def check_prerequisites(self):
run_report = True
if self.state is None:
warnings.warn(
f"{self.__class__.__name__} can only be executed as a hyperparameter report. MLSubseqPerformance report will not be created.")
run_report = False
if self.result_path is None:
warnings.warn(f"{self.__class__.__name__} requires an output 'path' to be set. {self.__class__.__name__} report will not be created.")
run_report = False
for assessment_state in self.state.assessment_states:
for label_state in assessment_state.label_states.values():
for assessment_item in label_state.assessment_items.values():
if not self._check_valid_assessment_item(assessment_item):
warnings.warn(f"{self.__class__.__name__} can only be used on encoder-ML method combinations that use k-mer encoding"
f"with continuous k-mers, or DeepRC. {self.__class__.__name__} report will not be created.")
run_report = False
return run_report