from pathlib import Path
from immuneML.data_model.dataset.Dataset import Dataset
from immuneML.reports.Report import Report
[docs]
class DataReport(Report):
'''
Data reports show some type of features or statistics about a given dataset.
When running the :ref:`TrainMLModel` instruction, data reports can be specified inside the 'selection' or 'assessment'
specification under the keys 'reports/data' (current cross-validation split) or 'reports/data_splits' (train/test sub-splits).
Example:
.. indent with spaces
.. code-block:: yaml
my_instruction:
type: TrainMLModel
selection:
reports:
data:
- my_data_report
# other parameters...
assessment:
reports:
data:
- my_data_report
# other parameters...
# other parameters...
Alternatively, when running the :ref:`ExploratoryAnalysis` instruction, data reports can be specified under 'report'. Example:
.. indent with spaces
.. code-block:: yaml
my_instruction:
type: ExploratoryAnalysis
analyses:
my_first_analysis:
report: my_data_report
# other parameters...
# other parameters...
'''
[docs]
def __init__(self, dataset: Dataset = None, result_path: Path = None, name: str = None, number_of_processes: int = 1):
'''
The arguments defined below are set at runtime by the instruction.
Concrete classes inheriting DataReport may include additional parameters that will be set by the user in the form of input arguments.
dataset (Dataset): a dataset object (can be repertoire, receptor or sequence dataset, depending on the specific report)
result_path (Path): location where the results (plots, tables, etc.) will be stored
name (str): user-defined name of the report used in the HTML overview automatically generated by the platform
number_of_processes (int): how many processes should be created at once to speed up the analysis. For personal machines, 4 or 8 is usually a good choice.
'''
super().__init__(name=name, number_of_processes=number_of_processes)
self.dataset = dataset
self.result_path = result_path
[docs]
@staticmethod
def get_title():
return "Data reports"