Source code for immuneML.dsl.instruction_parsers.LabelHelper

import logging

from immuneML.data_model.datasets.Dataset import Dataset
from immuneML.environment.LabelConfiguration import LabelConfiguration
from immuneML.util.ParameterValidator import ParameterValidator


[docs] class LabelHelper:
[docs] @staticmethod def check_label_format(labels: list, instruction_name: str, yaml_location: str): ParameterValidator.assert_type_and_value(labels, list, instruction_name, f'{yaml_location}/labels') assert all(isinstance(label, str) or isinstance(label, dict) for label in labels), \ f"{instruction_name}: labels under {yaml_location} were not defined properly. The list of labels has to either be a list of " \ f"label names, or there can be a parameter 'positive_class' defined under the label name, for example:\n" \ f"labels: # one label with no positive class (T1D) and one with a positive class (CMV)\n" \ f"- T1D\n" \ f"- CMV: # when defining a positive class, make sure to use the correct indentation\n" \ f" positive_class: True\n" \ assert all(len(list(label.keys())) == 1 and isinstance(list(label.values())[0], dict) and 'positive_class' in list(label.values())[0] and len(list(list(label.values())[0].keys())) == 1 for label in [l for l in labels if isinstance(l, dict)]), \ f"{instruction_name}: The only legal parameter under a label name is 'positive_class'. If 'positive_class' is not specified, please " \ f"remove the colon after the label name. "
[docs] @staticmethod def create_label_config(labels: list, dataset: Dataset, instruction_name: str, yaml_location: str) -> LabelConfiguration: LabelHelper.check_label_format(labels, instruction_name, yaml_location) label_config = LabelConfiguration() for label in labels: label_name = label if isinstance(label, str) else list(label.keys())[0] positive_class = label[label_name]['positive_class'] if isinstance(label, dict) else None if dataset.labels is not None and label_name in dataset.labels: label_values = list(dataset.labels[label_name]) elif hasattr(dataset, "get_metadata"): label_values = list(set(dataset.get_metadata([label_name])[label_name])) else: label_values = [] logging.warning(f"{instruction_name}: for {yaml_location}, label values could not be recovered for label " f"{label}, using empty list instead. This issue may occur due to improper loading of dataset {dataset.name}," f"and could cause problems with some encodings.") label_config.add_label(label_name, label_values, positive_class=positive_class) return label_config