from pathlib import Path
from typing import Tuple
import numpy as np
import plotly.graph_objects as go
from scipy.stats import beta
from immuneML.data_model.datasets.Dataset import Dataset
from immuneML.hyperparameter_optimization.HPSetting import HPSetting
from immuneML.ml_methods.classifiers.MLMethod import MLMethod
from immuneML.ml_methods.classifiers.ProbabilisticBinaryClassifier import ProbabilisticBinaryClassifier
from immuneML.reports.ReportOutput import ReportOutput
from immuneML.reports.ReportResult import ReportResult
from immuneML.reports.ml_reports.MLReport import MLReport
from immuneML.util.PathBuilder import PathBuilder
[docs]
class SequenceAssociationLikelihood(MLReport):
"""
Plots the beta distribution used as a prior for class assignment in ProbabilisticBinaryClassifier. The distribution plotted shows
the probability that a sequence is associated with a given class for a label.
**YAML specification:**
.. indent with spaces
.. code-block:: yaml
definitions:
reports:
my_sequence_assoc_report: SequenceAssociationLikelihood
"""
DISTRIBUTION_PERCENTAGE_TO_SHOW = 0.999
STEP = 400
[docs]
@classmethod
def build_object(cls, **kwargs):
return SequenceAssociationLikelihood(**kwargs)
[docs]
def check_prerequisites(self):
if not isinstance(self.method, ProbabilisticBinaryClassifier):
return False
else:
return True
def __init__(self, train_dataset: Dataset = None, test_dataset: Dataset = None,
method: MLMethod = None, result_path: Path = None, name: str = None, hp_setting: HPSetting = None,
label=None, number_of_processes: int = 1):
super().__init__(train_dataset=train_dataset, test_dataset=test_dataset, method=method, result_path=result_path,
name=name, hp_setting=hp_setting, label=label, number_of_processes=number_of_processes)
self.result_name = None
def _generate(self) -> ReportResult:
PathBuilder.build(self.result_path)
lower_limit, upper_limit = self.get_distribution_limits()
self.result_name = "beta_distribution"
report_output_fig = self._plot(upper_limit=upper_limit, lower_limit=lower_limit)
output_figures = [] if report_output_fig is None else [report_output_fig]
return ReportResult(name=self.name,
info="Beta distribution priors - probability that a sequence is disease-associated",
output_figures=output_figures)
def _plot(self, upper_limit, lower_limit):
beta_distribution_x = np.arange(start=lower_limit, stop=upper_limit, step=(upper_limit - lower_limit) / SequenceAssociationLikelihood.STEP)
negative_pdf = beta.pdf(beta_distribution_x, self.method.alpha_0, self.method.beta_0)
positive_pdf = beta.pdf(beta_distribution_x, self.method.alpha_1, self.method.beta_1)
figure = go.Figure()
figure.add_trace(go.Scatter(x=beta_distribution_x, y=negative_pdf, mode='lines', line=dict(color='#E69F00', width=2), name=f"{self.method.get_label_name()} {self.method.class_mapping[0]}"))
figure.add_trace(go.Scatter(x=beta_distribution_x, y=positive_pdf, mode='lines', line=dict(color='#0072B2', width=2), name=f"{self.method.get_label_name()} {self.method.class_mapping[1]}"))
figure.update_layout(template="plotly_white", xaxis_title=f"probability that receptor sequence is {self.method.get_label_name()}-associated",
yaxis_title="probability density function", xaxis={'tickformat': '.2e'}, yaxis={'tickformat': '.2e'})
output_path = self.result_path / f"{self.result_name}.html"
figure.write_html(str(output_path))
return ReportOutput(output_path)
[docs]
def get_distribution_limits(self) -> Tuple[float, float]:
lower_limit_0, upper_limit_0 = beta.interval(SequenceAssociationLikelihood.DISTRIBUTION_PERCENTAGE_TO_SHOW,
self.method.alpha_0, self.method.beta_0)
lower_limit_1, upper_limit_1 = beta.interval(SequenceAssociationLikelihood.DISTRIBUTION_PERCENTAGE_TO_SHOW,
self.method.alpha_1, self.method.beta_1)
lower_limit = min(lower_limit_0, lower_limit_1)
upper_limit = max(upper_limit_0, upper_limit_1)
return lower_limit, upper_limit