Source code for immuneML.data_model.encoded_data.EncodedData

import pandas as pd


[docs]class EncodedData: """ When a dataset is encoded, it is stored in an object of EncodedData class. Arguments: examples: a matrix of example_count x feature_count elements (can be a numpy array or a sparse matrix); there are some exceptions to this, for instance, :py:obj:`source.encodings.onehot.OneHotEncoder.OneHotEncoder` where the numpy array has more than two dimensions, but most of the encodings follow the matrix format. feature_names: a list of feature names with feature_count elements feature_annotations: a data frame consisting of annotations for each unique feature example_ids: a list of example (repertoire/sequence/receptor) IDs; it must be the same length as the example_count in the examples matrix labels: a dict of labels where label names are keys and the values are lists of values for the label across examples: {label_name1: [...], label_name2: [...]}. Each list associated with a label has to have values for all examples. """ def __init__(self, examples, labels: dict = None, example_ids: list = None, feature_names: list = None, feature_annotations: pd.DataFrame = None, encoding: str = None, info: dict = None): assert feature_names is None or examples.shape[1] == len(feature_names) if feature_names is not None: assert feature_annotations is None or feature_annotations.shape[0] == len(feature_names) == examples.shape[1] if example_ids is not None and labels is not None: for label in labels.values(): assert len(label) == len(example_ids), "EncodedData: there are {} labels, but {} examples"\ .format(len(label), len(example_ids)) assert examples is None or len(example_ids) == examples.shape[0], "EncodedData: there are {} example ids, but {} examples."\ .format(len(example_ids), examples.shape[0]) if examples is not None: assert all(len(labels[key]) == examples.shape[0] for key in labels.keys()) if labels is not None else True self.examples = examples self.labels = labels self.example_ids = example_ids self.feature_names = feature_names self.feature_annotations = feature_annotations self.encoding = encoding self.info = info