Source code for immuneML.data_model.datasets.Dataset

import abc
from dataclasses import dataclass, field
from pathlib import Path
from typing import List

from immuneML.data_model.EncodedData import EncodedData


[docs] @dataclass class Dataset: identifier: str = None name: str = None encoded_data: EncodedData = None labels: dict = field(default_factory=dict) dataset_file: Path = None TRAIN = "train" TEST = "test" SUBSAMPLED = "subsampled" PREPROCESSED = "preprocessed" def __post_init__(self): if self.name is None: self.name = self.identifier
[docs] @classmethod @abc.abstractmethod def build_from_objects(cls, **kwargs): pass
[docs] @classmethod @abc.abstractmethod def create_metadata_dict(cls, **kwargs): pass
[docs] @abc.abstractmethod def make_subset(self, example_indices, path, dataset_type: str): pass
[docs] @abc.abstractmethod def get_example_count(self): pass
[docs] @abc.abstractmethod def get_data(self, batch_size: int = 1): pass
[docs] @abc.abstractmethod def get_example_ids(self): pass
[docs] @abc.abstractmethod def get_label_names(self): pass
[docs] @abc.abstractmethod def clone(self, keep_identifier: bool = False): pass
[docs] @abc.abstractmethod def get_metadata(self, field_names: list, return_df: bool = False): pass
[docs] @abc.abstractmethod def get_data_from_index_range(self, start_index: int, end_index: int): pass