Source code for immuneML.dsl.import_parsers.ImportParser

from pathlib import Path
from typing import Tuple

from immuneML.IO.dataset_import.DataImport import DataImport
from immuneML.IO.dataset_import.IReceptorImport import IReceptorImport
from immuneML.data_model.dataset.Dataset import Dataset
from immuneML.data_model.receptor.ChainPair import ChainPair
from immuneML.dsl.DefaultParamsLoader import DefaultParamsLoader
from immuneML.dsl.symbol_table.SymbolTable import SymbolTable
from immuneML.dsl.symbol_table.SymbolType import SymbolType
from immuneML.util.Logger import log, print_log
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.ReflectionHandler import ReflectionHandler


[docs] class ImportParser: keyword = "datasets" valid_keys = ["format", "params"]
[docs] @staticmethod def parse(workflow_specification: dict, symbol_table: SymbolTable, result_path: Path) -> Tuple[SymbolTable, dict]: assert ImportParser.keyword in workflow_specification and isinstance(workflow_specification[ImportParser.keyword], dict), \ "ImmuneMLParser: datasets are not defined." for key in workflow_specification[ImportParser.keyword].keys(): symbol_table = ImportParser._parse_dataset(key, workflow_specification[ImportParser.keyword][key], symbol_table, result_path) return symbol_table, workflow_specification[ImportParser.keyword]
@staticmethod @log def _parse_dataset(key: str, dataset_specs: dict, symbol_table: SymbolTable, result_path: Path) -> SymbolTable: location = "ImportParser" ParameterValidator.assert_keys(list(dataset_specs.keys()), ImportParser.valid_keys, location, f"datasets:{key}", False) valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataImport, "Import", "IO/dataset_import/") ParameterValidator.assert_in_valid_list(dataset_specs["format"], valid_formats, location, "format") import_cls = ReflectionHandler.get_class_by_name("{}Import".format(dataset_specs["format"])) params = ImportParser._prepare_params(dataset_specs, result_path, key) if "is_repertoire" in params: ParameterValidator.assert_type_and_value(params["is_repertoire"], bool, location, "is_repertoire") if params["is_repertoire"]: if import_cls != IReceptorImport: assert "metadata_file" in params, f"{location}: Missing parameter: metadata_file under {key}/params/" ParameterValidator.assert_type_and_value(params["metadata_file"], Path, location, "metadata_file") else: assert "paired" in params, f"{location}: Missing parameter: paired under {key}/params/" ParameterValidator.assert_type_and_value(params["paired"], bool, location, "paired") if params["paired"]: assert "receptor_chains" in params, f"{location}: Missing parameter: receptor_chains under {key}/params/" ParameterValidator.assert_in_valid_list(params["receptor_chains"], ["_".join(cp.value) for cp in ChainPair], location, "receptor_chains") try: dataset = import_cls.import_dataset(params, key) dataset.name = key symbol_table.add(key, SymbolType.DATASET, dataset) ImportParser.log_dataset_info(dataset) except KeyError as key_error: raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. " f"The keyword {key_error.args[0]} was missing. This either means this argument was " f"not defined under definitions/datasets/{key}/params, or this column was missing from " f"an input data file. ") except Exception as ex: raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.") return symbol_table @staticmethod def _prepare_params(dataset_specs: dict, result_path: Path, dataset_name: str): params = DefaultParamsLoader.load(ImportParser.keyword, dataset_specs["format"]) if "params" in dataset_specs.keys(): params = {**params, **dataset_specs["params"]} if "result_path" not in params or params["result_path"] is None: params["result_path"] = Path(result_path) / "datasets" / dataset_name else: params["result_path"] = Path(params["result_path"]) if "path" in params: params["path"] = Path(params["path"]) if "metadata_file" in params: params["metadata_file"] = Path(params["metadata_file"]) dataset_specs["params"] = params return params
[docs] @staticmethod def log_dataset_info(dataset: Dataset): print_log(f"Imported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name} with " f"{dataset.get_example_count()} examples.", True)