Source code for immuneML.preprocessing.Preprocessor

import abc
from pathlib import Path

from immuneML.data_model.datasets.RepertoireDataset import RepertoireDataset



[docs]
class Preprocessor(metaclass=abc.ABCMeta):

    def __init__(self, result_path: Path = None):
        self.result_path = result_path


[docs]
    @abc.abstractmethod
    def process_dataset(self, dataset: RepertoireDataset, result_path: Path, number_of_processes: int = 1) -> RepertoireDataset:
        pass



[docs]
    def check_dataset_type(self, dataset, valid_dataset_types: list, location: str):
        assert type(dataset) in valid_dataset_types, f"{location}: this preprocessing can only be applied to datasets of type: " \
                                                     f"{', '.join([dataset_type.__name__ for dataset_type in valid_dataset_types])}. " \
                                                     f"Your dataset is a {type(dataset).__name__}. " \
                                                     f"Please use a different preprocessing, or omit the preprocessing for this dataset."



[docs]
    def keeps_example_count(self) -> bool:
        """
        Defines if the preprocessing can be run with TrainMLModel instruction; to be able to run with it, the preprocessing cannot change the
        number of examples in the dataset
        """
        return True