Source code for immuneML.dsl.instruction_parsers.DatasetExportParser

from pathlib import Path

from immuneML.IO.dataset_export.DataExporter import DataExporter
from immuneML.dsl.symbol_table.SymbolTable import SymbolTable
from immuneML.dsl.symbol_table.SymbolType import SymbolType
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.ReflectionHandler import ReflectionHandler
from immuneML.workflows.instructions.dataset_generation.DatasetExportInstruction import DatasetExportInstruction


[docs] class DatasetExportParser: """ Specification of instruction with a random datasets: definitions: datasets: my_generated_dataset: # a dataset to be exported in the given format format: RandomRepertoireDataset params: result_path: generated_dataset/ repertoire_count: 100 sequence_count_probabilities: 100: 0.5 120: 0.5 sequence_length_probabilities: 12: 0.333 13: 0.333 14: 0.333 labels: immune_event_1: yes: 0.5 no: 0.5 preprocessing_sequences: my_preprocessing: - my_filter: ClonesPerRepertoireFilter: lower_limit: 110 upper_limit: 200 instructions: my_instruction1: # instruction name type: DatasetExport datasets: # list of datasets to export - my_generated_dataset preprocessing_sequence: my_preprocessing_sequence number_of_processes: 4 export_formats: # list of formats to export the datasets to - AIRR - ImmuneML """ REQUIRED_KEYS = ["type", "datasets", "export_formats"] OPTIONAL_KEYS = ["preprocessing_sequence", "number_of_processes"]
[docs] def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> DatasetExportInstruction: location = "DatasetExportParser" ParameterValidator.assert_keys(list(instruction.keys()), DatasetExportParser.REQUIRED_KEYS + DatasetExportParser.OPTIONAL_KEYS, location, key, False) ParameterValidator.assert_keys_present(list(instruction.keys()), DatasetExportParser.REQUIRED_KEYS, location, key) valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, "Exporter", 'dataset_export/') ParameterValidator.assert_all_in_valid_list(instruction["export_formats"], valid_formats, location, "export_formats") ParameterValidator.assert_all_in_valid_list(instruction["datasets"], symbol_table.get_keys_by_type(SymbolType.DATASET), location, "datasets") if "number_of_processes" in instruction: ParameterValidator.assert_type_and_value(instruction["number_of_processes"], int, location, "number_of_processes", 1) return DatasetExportInstruction(datasets=[symbol_table.get(dataset_key) for dataset_key in instruction["datasets"]], exporters=[ReflectionHandler.get_class_by_name(f"{key}Exporter", "dataset_export/") for key in instruction["export_formats"]], number_of_processes=instruction["number_of_processes"] if "number_of_processes" in instruction else 1, preprocessing_sequence=symbol_table.get(instruction["preprocessing_sequence"]) if "preprocessing_sequence" in instruction else None, name=key)