Source code for immuneML.reports.data_reports.DataReport

from pathlib import Path

from immuneML.data_model.datasets.Dataset import Dataset
from immuneML.reports.Report import Report



[docs]
class DataReport(Report):
    """
    Data reports show some type of features or statistics about a given dataset.

    When running the :ref:`TrainMLModel` instruction, data reports can be specified inside the 'selection' or 'assessment'
    specification under the keys 'reports/data' (current cross-validation split) or 'reports/data_splits' (train/test sub-splits).
    Example:

    .. indent with spaces
    .. code-block:: yaml

        definitions:
            reports:
                my_data_report: SequenceCountDistribution
        my_instruction:
            type: TrainMLModel
            selection:
                reports:
                    data:
                        - my_data_report
                # other parameters...
            assessment:
                reports:
                    data:
                        - my_data_report
                # other parameters...
            # other parameters...

    Alternatively, when running the :ref:`ExploratoryAnalysis` instruction, data reports can be specified under 'report'. Example:

    .. indent with spaces
    .. code-block:: yaml

        my_instruction:
            type: ExploratoryAnalysis
            analyses:
                my_first_analysis:
                    report: my_data_report
                    # other parameters...
            # other parameters...
    """
    DOCS_TITLE = "Data reports"


[docs]
    def __init__(self, dataset: Dataset = None, result_path: Path = None, name: str = None, number_of_processes: int = 1):
        """
        The arguments defined below are set at runtime by the instruction.
        Concrete classes inheriting DataReport may include additional parameters that will be set by the user in the form of input arguments.

        dataset (Dataset): a dataset object (can be repertoire, receptor or sequence dataset, depending on the specific report)
        result_path (Path): location where the results (plots, tables, etc.) will be stored
        name (str): user-defined name of the report used in the HTML overview automatically generated by the platform
        number_of_processes (int): how many processes should be created at once to speed up the analysis. For personal machines, 4 or 8 is usually a good choice.
        """
        super().__init__(name=name, result_path=result_path, number_of_processes=number_of_processes)
        self.dataset = dataset