import datetime
from pathlib import Path
from typing import List
from immuneML.IO.dataset_export.DataExporter import DataExporter
from immuneML.data_model.dataset.Dataset import Dataset
from immuneML.util.ReflectionHandler import ReflectionHandler
from immuneML.workflows.instructions.Instruction import Instruction
from immuneML.workflows.instructions.dataset_generation.DatasetExportState import DatasetExportState
from scripts.specification_util import update_docs_per_mapping
[docs]class DatasetExportInstruction(Instruction):
"""
DatasetExport instruction takes a list of datasets as input and outputs them in specified formats.
Arguments:
datasets (list): a list of datasets to export in all given formats
formats (list): a list of formats in which to export the datasets. Valid formats are class names of any non-abstract class inheriting :py:obj:`~immuneML.IO.dataset_export.DataExporter.DataExporter`. Important note: Pickle files might not be compatible between different immuneML (sub)versions.
YAML specification:
.. indent with spaces
.. code-block:: yaml
my_dataset_export_instruction: # user-defined instruction name
type: DatasetExport # which instruction to execute
datasets: # list of datasets to export
- my_generated_dataset
- my_dataset_from_adaptive
export_formats: # list of formats to export the datasets to
- AIRR
- Pickle
"""
def __init__(self, datasets: List[Dataset], exporters: List[DataExporter], result_path: Path = None, name: str = None):
self.datasets = datasets
self.exporters = exporters
self.result_path = result_path
self.name = name
[docs] def run(self, result_path: Path) -> DatasetExportState:
self.result_path = result_path / self.name
paths = {}
for dataset in self.datasets:
dataset_name = dataset.name if dataset.name is not None else dataset.identifier
paths[dataset_name] = {}
for exporter in self.exporters:
export_format = exporter.__name__[:-8]
path = self.result_path / dataset_name / export_format
exporter.export(dataset, path)
paths[dataset_name][export_format] = path
contains = str(dataset.__class__.__name__).replace("Dataset", "s").lower()
print(f"{datetime.datetime.now()}: Exported dataset {dataset_name} containing {dataset.get_example_count()} {contains} in {export_format} format.", flush=True)
return DatasetExportState(datasets=self.datasets, formats=[exporter.__name__[:-8] for exporter in self.exporters],
paths=paths, result_path=self.result_path, name=self.name)
[docs] @staticmethod
def get_documentation():
doc = str(DatasetExportInstruction.__doc__)
valid_strategy_values = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, "Exporter", "dataset_export/")
valid_strategy_values = str(valid_strategy_values)[1:-1].replace("'", "`")
mapping = {
"Valid formats are class names of any non-abstract class inheriting "
":py:obj:`~immuneML.IO.dataset_export.DataExporter.DataExporter`.": f"Valid values are: {valid_strategy_values}."
}
doc = update_docs_per_mapping(doc, mapping)
return doc