Source code for immuneML.reports.data_reports.GLIPH2Exporter

from pathlib import Path

import pandas as pd

from immuneML.data_model.datasets.ElementDataset import ReceptorDataset
from immuneML.reports.ReportOutput import ReportOutput
from immuneML.reports.ReportResult import ReportResult
from immuneML.reports.data_reports.DataReport import DataReport
from immuneML.util.PathBuilder import PathBuilder



[docs]
class GLIPH2Exporter(DataReport):
    """
    Report which exports the receptor data to GLIPH2 format so that it can be directly used in GLIPH2 tool. Currently, the report accepts only
    receptor datasets.

    GLIPH2 publication: Huang H, Wang C, Rubelt F, Scriba TJ, Davis MM. Analyzing the Mycobacterium tuberculosis immune response by T-cell receptor
    clustering with GLIPH2 and genome-wide antigen screening. Nature Biotechnology. Published online April 27,
    2020:1-9. `doi:10.1038/s41587-020-0505-4 <https://www.nature.com/articles/s41587-020-0505-4>`_

    **Specification arguments:**

    - condition (str): name of the parameter present in the receptor metadata in the dataset; condition can be anything which can be processed in
      GLIPH2, such as tissue type or treatment.

    **YAML specification:**

    .. indent with spaces
    .. code-block:: yaml

        definitions:
            reports:
                my_gliph2_exporter:
                    GLIPH2Exporter:
                        condition: epitope # for instance, epitope parameter is present in receptors' metadata with values such as "MtbLys" for Mycobacterium tuberculosis (as shown in the original paper).

    """


[docs]
    @classmethod
    def build_object(cls, **kwargs):
        return GLIPH2Exporter(**kwargs)


    def __init__(self, dataset: ReceptorDataset = None, result_path: Path = None, name: str = None, condition: str = None, number_of_processes: int = 1):
        super().__init__(dataset=dataset, result_path=result_path, number_of_processes=number_of_processes, name=name)
        self.condition = condition

    def _generate(self) -> ReportResult:
        PathBuilder.build(self.result_path)
        alpha_chains, beta_chains, trbv, trbj, subject_condition, count = [], [], [], [], [], []
        for index, receptor in enumerate(self.dataset.get_data()):
            alpha_chains.append(receptor.alpha.sequence_aa)
            beta_chains.append(receptor.beta.sequence_aa)
            trbv.append(receptor.beta.v_call)
            trbj.append(receptor.beta.j_call)
            subject_condition.append(f"{getattr(receptor.metadata, 'subject_id', str(index))}:{receptor.metadata[self.condition]}")
            count.append(receptor.beta.duplicate_count
                         if receptor.beta.duplicate_count is not None else 1)

        df = pd.DataFrame({"CDR3b": beta_chains, "TRBV": trbv, "TRBJ": trbj, "CDR3a": alpha_chains,
                           "subject:condition": subject_condition, "count": count})
        file_path = self.result_path / "exported_data.tsv"
        df.to_csv(file_path, sep="\t", index=False)

        return ReportResult(self.name,
                            info="Report which exports the receptor data to GLIPH2 format so that it can be directly "
                                 "used in GLIPH2 tool.",
                            output_tables=[ReportOutput(file_path, "exported data in GLIPH2 format")])


[docs]
    def check_prerequisites(self):
        if isinstance(self.dataset, ReceptorDataset):
            return True
        else:
            return False