Toggle Light / Dark / Auto color theme
Toggle table of contents sidebar
Source code for immuneML.reports.data_reports.LabelDist
from pathlib import Path
import math
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
from immuneML.data_model.datasets.Dataset import Dataset
from immuneML.reports.PlotlyUtil import PlotlyUtil
from immuneML.reports.ReportOutput import ReportOutput
from immuneML.reports.ReportResult import ReportResult
from immuneML.reports.data_reports.DataReport import DataReport
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.PathBuilder import PathBuilder
[docs]
class LabelDist ( DataReport ):
"""
LabelDist report plots the distribution of label values for all labels provided as
input to the report.
Specification arguments:
- labels (list): list of label names as they appear in the metadata file (RepertoireDataset)
or in data files (Receptor/SequenceDataset).
YAML specification:
.. code-block: yaml
reports:
label_count_report:
LabelCount:
labels: ['diagnosis', 'age_group', 'batch']
"""
def __init__ ( self , dataset : Dataset = None , result_path : Path = None , name : str = None ,
number_of_processes : int = 1 , labels : list = None ):
super () . __init__ ( dataset = dataset , result_path = result_path , name = name , number_of_processes = number_of_processes )
self . labels = labels
[docs]
@classmethod
def build_object ( cls , ** kwargs ):
ParameterValidator . assert_keys ( list ( kwargs . keys ()), [ "labels" , 'name' ], "LabelDist report" , "LabelCount" )
ParameterValidator . assert_all_type_and_value ( kwargs [ "labels" ], str , "LabelDist" , "labels" )
return cls ( name = kwargs [ "name" ], labels = kwargs [ "labels" ])
def _generate ( self ) -> ReportResult :
df = self . dataset . get_metadata ( self . labels , return_df = True )
n_cols = 2
n_rows = math . ceil ( len ( df . columns ) / n_cols )
fig = make_subplots ( rows = n_rows , cols = n_cols , subplot_titles = df . columns )
colors = px . colors . qualitative . Vivid * math . ceil ( len ( df . columns ) / len ( px . colors . qualitative . Vivid ))
for i , col in enumerate ( df . columns ):
row = i // n_cols + 1
col_pos = i % n_cols + 1
color = colors [ i ]
if pd . api . types . is_numeric_dtype ( df [ col ]):
trace = go . Histogram ( x = df [ col ], name = col , marker_color = color )
else :
counts = df [ col ] . value_counts ()
trace = go . Bar ( x = counts . index . astype ( str ), y = counts . values , name = col , marker_color = color )
fig . add_trace ( trace , row = row , col = col_pos )
fig . update_layout ( template = 'plotly_white' , height = 300 * n_rows , showlegend = False ,
title_text = "Label distributions" )
path = PathBuilder . build ( self . result_path ) / f " { self . name } _label_distributions.html"
PlotlyUtil . write_image_to_file ( fig , path , df . shape [ 0 ])
return ReportResult ( name = self . name , info = 'Label distributions' ,
output_figures = [ ReportOutput ( name = 'Label distributions' , path = path )])