Source code for immuneML.dsl.instruction_parsers.LabelHelper

import logging

from immuneML.data_model.datasets.Dataset import Dataset
from immuneML.environment.LabelConfiguration import LabelConfiguration
from immuneML.util.ParameterValidator import ParameterValidator



[docs]
class LabelHelper:


[docs]
    @staticmethod
    def check_label_format(labels: list, instruction_name: str, yaml_location: str):
        ParameterValidator.assert_type_and_value(labels, list, instruction_name, f'{yaml_location}/labels')

        assert all(isinstance(label, str) or isinstance(label, dict) for label in labels), \
            f"{instruction_name}: labels under {yaml_location} were not defined properly. The list of labels has to either be a list of " \
            f"label names, or there can be a parameter 'positive_class' defined under the label name, for example:\n" \
            f"labels: # one label with no positive class (T1D) and one with a positive class (CMV)\n" \
            f"- T1D\n" \
            f"- CMV: # when defining a positive class, make sure to use the correct indentation\n" \
            f"    positive_class: True\n" \

        assert all(len(list(label.keys())) == 1 and isinstance(list(label.values())[0], dict) and 'positive_class' in list(label.values())[0]
                   and len(list(list(label.values())[0].keys())) == 1 for label in [l for l in labels if isinstance(l, dict)]), \
            f"{instruction_name}: The only legal parameter under a label name is 'positive_class'. If 'positive_class' is not specified, please " \
            f"remove the colon after the label name. "



[docs]
    @staticmethod
    def create_label_config(labels: list, dataset: Dataset, instruction_name: str, yaml_location: str) -> LabelConfiguration:
        LabelHelper.check_label_format(labels, instruction_name, yaml_location)

        label_config = LabelConfiguration()
        for label in labels:
            label_name = label if isinstance(label, str) else list(label.keys())[0]
            positive_class = label[label_name]['positive_class'] if isinstance(label, dict) else None
            if dataset.labels is not None and label_name in dataset.labels:
                label_values = list(dataset.labels[label_name])
            elif hasattr(dataset, "get_metadata"):
                label_values = list(set(dataset.get_metadata([label_name])[label_name]))
            else:
                label_values = []
                logging.warning(f"{instruction_name}: for {yaml_location}, label values could not be recovered for label "
                              f"{label}, using empty list instead. This issue may occur due to improper loading of dataset {dataset.name},"
                              f"and could cause problems with some encodings.")

            label_config.add_label(label_name, label_values, positive_class=positive_class)

        return label_config