Source code for immuneML.ml_methods.TCRdistClassifier

import copy

import numpy as np
from sklearn.neighbors import KNeighborsClassifier

from immuneML.ml_methods.SklearnMethod import SklearnMethod
from immuneML.util.ParameterValidator import ParameterValidator


[docs]class TCRdistClassifier(SklearnMethod): """ Implementation of a nearest neighbors classifier based on TCR distances as presented in Dash P, Fiore-Gartland AJ, Hertz T, et al. Quantifiable predictive features define epitope-specific T cell receptor repertoires. Nature. 2017; 547(7661):89-93. `doi:10.1038/nature22383 <https://www.nature.com/articles/nature22383>`_. This method is implemented using scikit-learn's KNeighborsClassifier with k determined at runtime from the training dataset size and weights linearly scaled to decrease with the distance of examples. Arguments: percentage (float): percentage of nearest neighbors to consider when determining receptor specificity based on known receptors (between 0 and 1) show_warnings (bool): whether to show warnings generated by scikit-learn, by default this is True. YAML specification: .. indent with spaces .. code-block:: yaml my_tcr_method: TCRdistClassifier: percentage: 0.1 show_warnings: True """ def __init__(self, percentage: float, show_warnings: bool = True): super().__init__() ParameterValidator.assert_type_and_value(percentage, float, "TCRdistClassifier", "percentage", min_inclusive=0., max_inclusive=1.) self.percentage = percentage self.k = None self.show_warnings = show_warnings def _get_ml_model(self, cores_for_training: int = 2, X=None): # compute k (number of nearest neighbors to consider) given the training dataset size (10% in the paper) self.k = int(X.shape[0] * self.percentage) # define function for computing weights which linearly decrease with distance def weights_func(distances): for point_dist_i, point_dist in enumerate(distances): if hasattr(point_dist, '__contains__') and 0. in point_dist: distances[point_dist_i] = point_dist == 0. else: distances[point_dist_i] = point_dist / np.sum(point_dist).astype(float) # make an object of KNN class with precomputed metric return KNeighborsClassifier(n_neighbors=self.k, weights=weights_func, metric='precomputed', n_jobs=cores_for_training)
[docs] def get_params(self): return {**self.model.get_params(deep=True), **copy.deepcopy(vars(self))}
[docs] def can_predict_proba(self) -> bool: return True
def _get_model_filename(self): return "tcrdist_classifier"