Source code for immuneML.encodings.reference_encoding.MatchedReferenceUtil

import os

from immuneML.IO.dataset_import.DatasetImportParams import DatasetImportParams
from immuneML.dsl.DefaultParamsLoader import DefaultParamsLoader
from immuneML.environment.EnvironmentSettings import EnvironmentSettings
from immuneML.util.ImportHelper import ImportHelper
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.ReflectionHandler import ReflectionHandler


[docs]class MatchedReferenceUtil: """ Utility class for MatchedSequencesEncoder and MatchedReceptorsEncoder """
[docs] @staticmethod def prepare_reference(reference_params: dict, location: str, paired: bool): ParameterValidator.assert_keys(list(reference_params.keys()), ["format", "params"], location, "reference") seq_import_params = reference_params["params"] if "params" in reference_params else {} assert os.path.isfile(seq_import_params["path"]), f"{location}: the file {seq_import_params['path']} does not exist. " \ f"Specify the correct path under reference." if "is_repertoire" in seq_import_params: assert seq_import_params["is_repertoire"] == False, f"{location}: is_repertoire must be False for SequenceImport" else: seq_import_params["is_repertoire"] = False if "paired" in seq_import_params: assert seq_import_params["paired"] == paired, f"{location}: paired must be {paired} for SequenceImport" else: seq_import_params["paired"] = paired format_str = reference_params["format"] import_class = ReflectionHandler.get_class_by_name(f"{format_str}Import") default_params = DefaultParamsLoader.load(EnvironmentSettings.default_params_path / "datasets", DefaultParamsLoader.convert_to_snake_case(format_str)) params = {**default_params, **seq_import_params} processed_params = DatasetImportParams.build_object(**params) if format_str == "SingleLineReceptor": processed_params.result_path = reference_params['params']['path'] receptors = import_class.import_dataset(processed_params, 'tmp_receptor_dataset').get_data() else: receptors = ImportHelper.import_items(import_class, reference_params["params"]["path"], processed_params) return receptors