[docs]classClusteringVisualization(ClusteringMethodReport):""" A report that creates low-dimensional visualizations of clustering results using the specified dimensionality reduction method. For each dataset and clustering configuration, it creates a scatter plot where points are colored by their cluster assignments. Specification arguments: - dim_red_method (dict): specification of which dimensionality reduction to perform; valid options are presented under :ref:`**Dimensionality reduction methods**` and should be specified with the name of the method and its parameters, see the example below; if not specified, the report will use any dimensionality reduced data present in the dataset's encoded data; if the dataset does not contain dimensionality reduced data, and the encoded data has more than 2 dimensions, the report will be skipped. YAML specification: .. indent with spaces .. code-block:: yaml reports: my_report_with_pca: ClusteringVisualization: dim_red_method: PCA: n_components: 2 my_report_with_tsne: ClusteringVisualization: dim_red_method: TSNE: n_components: 2 init: pca my_report_existing_dim_red: ClusteringVisualization: dim_red_method: null """def__init__(self,dim_red_method:DimRedMethod=None,name:str=None,result_path:Path=None,clustering_item:ClusteringItem=None):super().__init__(name=name,result_path=result_path,clustering_item=clustering_item)self.dim_red_method=dim_red_methodself.result_name=Noneself.desc="Clustering Visualization"self._dimension_names=self.dim_red_method.get_dimension_names()ifself.dim_red_methodelseNone
[docs]@classmethoddefbuild_object(cls,**kwargs):location="ClusteringVisualization"name=kwargs["name"]if"name"inkwargselseNoneresult_path=kwargs["result_path"]if"result_path"inkwargselseNoneif"dim_red_method"inkwargsandkwargs["dim_red_method"]:method_name=list(kwargs["dim_red_method"].keys())[0]dim_red_method=MLParser.parse_any_model("dim_red_method",kwargs["dim_red_method"],method_name)[0]else:logging.warning(f"{location}: No dimensionality reduction method specified. ""If the encoded dataset includes dimensionality reduction, it will be used.")dim_red_method=Nonereturncls(dim_red_method=dim_red_method,name=name,result_path=result_path,clustering_item=kwargs['clustering_item']if'clustering_item'inkwargselseNone,)
def_generate(self)->ReportResult:PathBuilder.build(self.result_path)self.result_name=f"clustering_{self.dim_red_method.__class__.__name__.lower()}_plots"result_path=PathBuilder.build(self.result_path/self.result_name)plot_path=self._make_plot(result_path)report_output=ReportOutput(plot_path,f"Clustering visualization for {self.item.cl_setting.get_key()}")returnReportResult(f"{self.desc} ({self.name})",info=f"Visualizations of clustering results",output_figures=[report_output])def_make_plot(self,result_path:Path)->Path:ifself.dim_red_methodisnotNone:transformed_data=self.dim_red_method.fit_transform(dataset=self.item.dataset)elifself.item.dataset.encoded_data.dimensionality_reduced_dataisnotNone:transformed_data=self.item.dataset.encoded_data.dimensionality_reduced_dataself._dimension_names=self.item.dataset.encoded_data.dim_namesifself.item.dataset.encoded_data.dim_nameselse['dim1','dim2']elifself.item.dataset.encoded_data.examples.shape[1]<=2:transformed_data=self.item.dataset.encoded_data.get_examples_as_np_matrix()self._dimension_names=self.item.dataset.encoded_data.feature_nameselse:raiseValueError("ClusteringVisualization: No dimensionality reduction method specified, and the dataset ""does not contain dimensionality reduced data. Please specify a dimensionality reduction ""method.")df=pd.DataFrame(transformed_data,columns=self._dimension_names)df['cluster']=pd.Series(self.item.predictions).astype(str)df['id']=self.item.dataset.get_example_ids()fig=px.scatter(df,x=self._dimension_names[0],y=self._dimension_names[1],color='cluster',color_discrete_sequence=plotly.colors.qualitative.Set2,category_orders={'cluster':sorted(df.cluster.unique())},hover_data=['id'])fig.update_layout(template="plotly_white")df.to_csv(result_path/f"clustering_visualization_{self.dim_red_method.nameifself.dim_red_methodelse''}.csv",index=False)plot_path=PlotlyUtil.write_image_to_file(fig,result_path/f"clustering_visualization_{self.dim_red_method.nameifself.dim_red_methodelse''}.html",df.shape[0])returnplot_path