[docs]classFurthestNeighborClassifier:""" Furthest Neighbor Classifier for clustering tasks. It predicts the label (cluster) of an example as the label corresponding to the minimal maximum distance across all labels in training data. The metric used for distance computation can be any metric supported by sklearn.metrics.DistanceMetric, or precomputed (e.g., when using TCRdistEncoder). """def__init__(self,metric:str='precomputed',**kwargs):super().__init__(**kwargs)self.metric=metricself.X_train=Noneself.y_train=Noneself.classes_=None
[docs]deffit(self,X,y):self.y_train=yself.classes_=np.unique(y)self.X_train=Xifself.metric=='precomputed':assertX.shape[0]==X.shape[1], \
(f"{FurthestNeighborClassifier.__name__}: distance matrix must be square for precomputed metric, "f"got: {X.shape}.")returnself
[docs]defpredict(self,X):ifself.metric!='precomputed':try:distance_metric=DistanceMetric.get_metric(self.metric)exceptExceptionase:raiseValueError(f"{FurthestNeighborClassifier.__name__}: Metric '{self.metric}' couldn't be "f"computed. Full error: {e}")distances=distance_metric.pairwise(X,self.X_train)else:distances=Xmax_dist_per_cluster=pd.DataFrame({cluster:distances[:,self.y_train==cluster].max(axis=1)forclusterinself.classes_})predictions=max_dist_per_cluster.idxmin(axis=1).valuesreturnpredictions