[docs]classDimensionalityReduction(EncodingReport):""" This report visualizes the data obtained by dimensionality reduction. **Specification arguments:** - label (str): name of the label to use for highlighting data points; or None - dim_red_method (str): name of the dimensionality reduction method defined under ml_methods that will be used to transform the data for plotting; if None, it will visualize the encoded data of reduced dimensionality if set **YAML specification:** .. indent with spaces .. code-block:: yaml definitions: reports: rep1: DimensionalityReduction: label: epitope dim_red_method: PCA: n_components: 2 """
def__init__(self,dataset:Dataset=None,batch_size:int=1,result_path:Path=None,name:str=None,label:str=None,dim_red_method:DimRedMethod=None):super().__init__(dataset=dataset,result_path=result_path,name=name)self._label=labelself._dim_red_method=dim_red_methodself.info=(f"This report visualizes the encoded data after applying dimensionality reduction "f"({self._dim_red_method.__class__.__name__}).")
def_generate(self)->ReportResult:ifself._dim_red_method:assertself.dataset.encoded_data.examplesisnotNone, \
f"{DimensionalityReduction.__name__}: data not encoded, report will not be made."dim_reduced_data=self._dim_red_method.fit_transform(self.dataset)else:assertself.dataset.encoded_data.dimensionality_reduced_dataisnotNonedim_reduced_data=self.dataset.encoded_data.dimensionality_reduced_dataassertdim_reduced_data.shape[1]==2data_labels=Nonetry:data_labels=self.dataset.get_attribute(self._label).tolist()except(AttributeError,TypeError)ase:logging.warning(f"Label {self._label} not found in the dataset. Skipping label coloring in the plot.")PathBuilder.build(self.result_path)df=pd.DataFrame({'example_id':self.dataset.get_example_ids(),"x":dim_reduced_data[:,0],'y':dim_reduced_data[:,1]})ifself._label:df[self._label]=data_labelsdf.to_csv(self.result_path/'dimensionality_reduced_data.csv',index=False)report_output_fig=self._safe_plot(df=df,output_written=True)output_figures=Noneifreport_output_figisNoneelse[report_output_fig]returnReportResult(name=self.name,info=self.info,output_figures=output_figures,output_tables=[ReportOutput(self.result_path/'dimensionality_reduced_data.csv','data after dimensionality reduction')])def_plot(self,df:pd.DataFrame)->ReportOutput:figure=px.scatter(df,x="x",y="y",color=self._label)figure.update_layout(template="plotly_white")PathBuilder.build(self.result_path)file_path=self.result_path/"dimensionality_reduction.html"figure.write_html(str(file_path))returnReportOutput(path=file_path,name="Data visualization after dimensionality reduction")