[docs]classMatchedReferenceUtil:""" Utility class for MatchedSequencesEncoder and MatchedReceptorsEncoder """
[docs]@staticmethoddefprepare_reference(reference_params:dict,location:str,paired:bool):ParameterValidator.assert_keys(list(reference_params.keys()),["format","params"],location,"reference")seq_import_params=reference_params["params"]if"params"inreference_paramselse{}assertos.path.isfile(seq_import_params["path"]),f"{location}: the file {seq_import_params['path']} does not exist. " \
f"Specify the correct path under reference."if"is_repertoire"inseq_import_params:assertseq_import_params["is_repertoire"]isFalse,f"{location}: is_repertoire must be False for SequenceImport"else:seq_import_params["is_repertoire"]=Falseif"paired"inseq_import_params:assertseq_import_params["paired"]==paired,f"{location}: paired must be {paired} for SequenceImport"else:seq_import_params["paired"]=pairedformat_str=reference_params["format"]import_class=ReflectionHandler.get_class_by_name(f"{format_str}Import")default_params=DefaultParamsLoader.load(EnvironmentSettings.default_params_path/"datasets",DefaultParamsLoader.convert_to_snake_case(format_str))params={**default_params,**seq_import_params}path=Path(reference_params['params']['path'])params['result_path']=PathBuilder.build(path.parent/'iml_imported'ifpath.is_file()elsepath/'iml_imported')ifformat_str=="SingleLineReceptor":receptors=list(import_class(params,'tmp_receptor_dataset').import_dataset().get_data())else:receptors=list(import_class(params=params,dataset_name="tmp_dataset").import_dataset().get_data())assertlen(receptors)>0,f"MatchedReferenceUtil: The total number of imported reference {'receptors'ifpairedelse'sequences'} is 0, please ensure that reference import is specified correctly."logging.info(f"MatchedReferenceUtil: successfully imported {len(receptors)} reference {'receptors'ifpairedelse'sequences'}.")returnreceptors