Source code for immuneML.data_model.datasets.Dataset
import abc
from dataclasses import dataclass, field
from pathlib import Path
from typing import List
from immuneML.data_model.EncodedData import EncodedData
[docs]
@dataclass
class Dataset:
identifier: str = None
name: str = None
encoded_data: EncodedData = None
labels: dict = field(default_factory=dict)
dataset_file: Path = None
TRAIN = "train"
TEST = "test"
SUBSAMPLED = "subsampled"
PREPROCESSED = "preprocessed"
def __post_init__(self):
if self.name is None:
self.name = self.identifier
[docs]
@classmethod
@abc.abstractmethod
def build_from_objects(cls, **kwargs):
pass
[docs]
@abc.abstractmethod
def make_subset(self, example_indices, path, dataset_type: str):
pass
[docs]
@abc.abstractmethod
def get_example_count(self):
pass
[docs]
@abc.abstractmethod
def get_data(self, batch_size: int = 1):
pass
[docs]
@abc.abstractmethod
def get_example_ids(self):
pass
[docs]
@abc.abstractmethod
def get_label_names(self):
pass
[docs]
@abc.abstractmethod
def clone(self, keep_identifier: bool = False):
pass
[docs]
@abc.abstractmethod
def get_data_from_index_range(self, start_index: int, end_index: int):
pass