Source code for immuneML.reports.clustering_method_reports.ExternalLabelClusterSummary

from pathlib import Path
from typing import List

import pandas as pd
import plotly.graph_objects as go

from immuneML.reports.PlotlyUtil import PlotlyUtil
from immuneML.reports.ReportOutput import ReportOutput
from immuneML.reports.ReportResult import ReportResult
from immuneML.reports.clustering_method_reports.ClusteringMethodReport import ClusteringMethodReport
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.PathBuilder import PathBuilder
from immuneML.workflows.instructions.clustering.clustering_run_model import ClusteringItem


[docs] class ExternalLabelClusterSummary(ClusteringMethodReport): """ This report summarizes the number of examples in a cluster with different values of external labels. For each external label, it creates: 1. A contingency table showing the count of examples for each combination of cluster and label value 2. A heatmap visualization of these counts It can be used in combination with Clustering instruction. **Specification arguments:** - external_labels (list): the list of metadata columns in the dataset that should be compared against cluster assignment **YAML specification:** .. indent with spaces .. code-block:: yaml reports: my_external_label_cluster_summary: ExternalLabelClusterSummary: external_labels: [disease, HLA] """
[docs] @classmethod def build_object(cls, **kwargs): ParameterValidator.assert_keys(list(kwargs.keys()), ['external_labels', 'name'], ExternalLabelClusterSummary.__name__, ExternalLabelClusterSummary.__name__) ParameterValidator.assert_all_type_and_value(kwargs['external_labels'], str, ExternalLabelClusterSummary.__name__, 'external_labels') return ExternalLabelClusterSummary(**kwargs)
def __init__(self, external_labels: List[str], name: str = None, item: ClusteringItem = None, result_path: Path = None): super().__init__(name=name, result_path=result_path, clustering_item=item) self.external_labels = external_labels self.desc = "External Label Cluster Summary" def _generate(self) -> ReportResult: self.result_path = PathBuilder.build(self.result_path / self.name) report_outputs = self._process_analysis_results() if not report_outputs: return ReportResult( name=f"{self.desc} ({self.name})", info="No results were generated. This could be because no external labels were found in the dataset " "metadata." ) return ReportResult( name=f"{self.desc} ({self.name})", info="Summary of cluster assignments versus external labels", output_tables=[output for output in report_outputs if 'table' in output.name], output_figures=[output for output in report_outputs if 'heatmap' in output.name] ) def _process_analysis_results(self) -> List[ReportOutput]: outputs = [] predictions = self.item.predictions dataset = self.item.dataset # For each external label labels = dataset.get_metadata(self.external_labels, return_df=True) for label in self.external_labels: label_values = labels[label] # Create contingency table contingency_df = pd.crosstab( pd.Series(predictions, name='cluster'), pd.Series(label_values, name=label) ) # Save contingency table table_path = self.result_path / f"{label}_contingency.csv" contingency_df.to_csv(table_path) outputs.append(ReportOutput( path=table_path, name=f"Contingency table for {label} ({self.item.cl_setting.get_key()})" )) # Create heatmap fig = go.Figure(data=go.Heatmap( z=contingency_df.values, x=contingency_df.columns, y=contingency_df.index, colorscale='Viridis', text=contingency_df.values, texttemplate='%{text}', hovertemplate='count: %{z}<br>cluster: %{y}<br>' + label + ': %{x}<extra></extra>', hoverongaps=False )) fig.update_layout( xaxis_title=label, yaxis_title='cluster', template='plotly_white' ) fig.update_xaxes(type='category') fig.update_yaxes(type='category') heatmap_path = self.result_path / f"{label}_heatmap.html" plot_path = PlotlyUtil.write_image_to_file(fig, heatmap_path, contingency_df.shape[0]) outputs.append(ReportOutput( path=plot_path, name=f"Distribution heatmap for {label} with example counts " f"({dataset.get_example_count()} total examples)" )) return outputs
[docs] def check_prerequisites(self): if self.item is None: return False if not self.external_labels: return False return True