Source code for immuneML.workflows.instructions.clustering.clustering_run_model

from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd

from immuneML.data_model.datasets.Dataset import Dataset
from immuneML.encodings.DatasetEncoder import DatasetEncoder
from immuneML.ml_methods.clustering.ClusteringMethod import ClusteringMethod
from immuneML.ml_methods.dim_reduction.DimRedMethod import DimRedMethod


[docs] class DataFrameWrapper: def __init__(self, path: Path, df: pd.DataFrame = None): self.path = path self.df = df if df is not None and not path.exists(): path.parent.mkdir(parents=True, exist_ok=True) df.to_csv(str(path), index=False)
[docs] def get_df(self): if self.df is None and self.path.exists(): self.df = pd.read_csv(str(self.path)) return self.df
[docs] @dataclass class ClusteringSetting: encoder: DatasetEncoder encoder_params: dict encoder_name: str clustering_method: ClusteringMethod clustering_params: dict clustering_method_name: str dim_reduction_method: DimRedMethod = None dim_red_params: dict = None dim_red_name: str = None path: Path = None
[docs] def get_key(self) -> str: key = self.encoder_name if self.dim_red_name: key += f"_{self.dim_red_name}" key += f"_{self.clustering_method_name}" return key
def __str__(self): return self.get_key()
[docs] @dataclass class ClusteringItem: dataset: Dataset = None method: ClusteringMethod = None encoder: DatasetEncoder = None internal_performance: DataFrameWrapper = None external_performance: DataFrameWrapper = None predictions: np.ndarray = None cl_setting: ClusteringSetting = None