immuneML.util package

Submodules

immuneML.util.AdaptiveImportHelper module

class immuneML.util.AdaptiveImportHelper.AdaptiveImportHelper[source]

Bases: object

static parse_adaptive_germline_to_imgt(dataframe, organism)[source]
static parse_germline(dataframe: pandas.DataFrame, gene_name_replacement: dict, germline_value_replacement: dict)[source]
static preprocess_dataframe(dataframe: pandas.DataFrame, params: DatasetImportParams)[source]
immuneML.util.AdaptiveImportHelper.is_valid_sequence_str(x)[source]
immuneML.util.AdaptiveImportHelper.make_gene_call_from_gene_and_allele(df: pandas.DataFrame, gene: str)[source]
immuneML.util.AdaptiveImportHelper.parse_allele(df: pandas.DataFrame, gene: str)[source]
immuneML.util.AdaptiveImportHelper.parse_gene_column(df: pandas.DataFrame, gene, gene_name_replacement, germline_value_replacement)[source]
immuneML.util.AdaptiveImportHelper.replace_nans_with_empty_str(df: pandas.DataFrame, col: str)[source]
immuneML.util.AdaptiveImportHelper.set_locus_column(df: pandas.DataFrame)[source]

immuneML.util.CompAIRRHelper module

class immuneML.util.CompAIRRHelper.CompAIRRHelper[source]

Bases: object

static check_compairr_path(compairr_path)[source]
static determine_compairr_path(compairr_path)[source]
static get_cmd_args(compairr_params: CompAIRRParams, input_file_list, result_path)[source]
static get_repertoire_contents(repertoire, compairr_params, encoder_params: EncoderParams, export_sequence_id=False)[source]
static process_compairr_output_file(subprocess_result, compairr_params, result_path)[source]
static read_compairr_output_file(output_file)[source]
static verify_compairr_output_path(subprocess_result, compairr_params, result_path)[source]
static write_repertoire_file(repertoire_dataset=None, filename=None, compairr_params: CompAIRRParams = None, repertoires: list = None, export_sequence_id: bool = False)[source]
static write_sequences_file(sequence_dataset, filename, compairr_params, repertoire_id='sequence_dataset')[source]

immuneML.util.CompAIRRParams module

class immuneML.util.CompAIRRParams.CompAIRRParams(compairr_path: pathlib.Path, keep_compairr_input: bool, differences: int, indels: bool, ignore_counts: bool, ignore_genes: bool, threads: int, output_filename: str, log_filename: str, output_pairs: bool, pairs_filename: str, is_cdr3: bool = False, do_repertoire_overlap: bool = True, do_sequence_matching: bool = False)[source]

Bases: object

compairr_path: Path
differences: int
do_repertoire_overlap: bool = True
do_sequence_matching: bool = False
ignore_counts: bool
ignore_genes: bool
indels: bool
is_cdr3: bool = False
keep_compairr_input: bool
log_filename: str
output_filename: str
output_pairs: bool
pairs_filename: str
threads: int

immuneML.util.DistanceMetrics module

immuneML.util.DistanceMetrics.jaccard(vector1, vector2, tmp_vector=None)[source]
immuneML.util.DistanceMetrics.morisita_horn(vector1, vector2, *args, **kwargs)[source]

immuneML.util.DocEnumHelper module

class immuneML.util.DocEnumHelper.DocEnumHelper[source]

Bases: object

static get_enum_names(enum)[source]
static get_enum_names_and_values(enum)[source]

immuneML.util.EncoderHelper module

class immuneML.util.EncoderHelper.EncoderHelper[source]

Bases: object

static build_comparison_data(dataset: RepertoireDataset, params: EncoderParams, comparison_attributes, sequence_batch_size)[source]
static build_comparison_params(dataset, comparison_attributes) tuple[source]
static check_dataset_type_available_in_mapping(dataset, class_name)[source]
static check_positive_class_labels(label_config: LabelConfiguration, location: str)[source]

Performs checks for Encoders that explicitly predict a positive class. These Encoders can only be trained for a single binary label at a time.

static encode_dataset_labels(dataset: Dataset, label_config: LabelConfiguration, encode_labels: bool = True)[source]

Automatically generates the encoded labels for a Dataset. This contains labels in the following format: {‘label_name’: [‘label_class1’, ‘label_class2’, ‘label_class2’]} where the inner list(s) contain the class label for each example in the dataset

static encode_element_dataset_labels(dataset: ElementDataset, label_config: LabelConfiguration)[source]

Automatically generates the encoded labels for an ElementDataset (= SequenceDataset or ReceptorDataset)

static encode_repertoire_dataset_labels(dataset: RepertoireDataset, label_config: LabelConfiguration)[source]

Automatically generates the encoded labels for a RepertoireDataset

static get_current_dataset(dataset, context)[source]

Retrieves the full dataset (training+validation+test) if present in context, otherwise return the given dataset

static get_example_weights_by_identifiers(dataset, example_identifiers)[source]
static get_single_label_name_from_config(label_config: LabelConfiguration, location='EncoderHelper')[source]
static prepare_training_ids(dataset: Dataset, params: EncoderParams)[source]
static sync_encoder_with_cache(cache_params: tuple, encoder_memo_func, encoder, param_names)[source]

immuneML.util.FilenameHandler module

class immuneML.util.FilenameHandler.FilenameHandler[source]

Bases: object

static get_dataset_name(class_name: str)[source]
static get_filename(class_name: str, file_type: str)[source]

converts the class name to snake case and appends given file type :param class_name: name of the class that will be stored in the file :param file_type: file extension: pickle, json :return: filename consisting of concatenated class_name in snake case and file type

static get_model_name(class_name: str, file_type: str = 'pickle')[source]

immuneML.util.ImportHelper module

class immuneML.util.ImportHelper.ImportHelper[source]

Bases: object

DATASET_FORMAT = 'yaml'
INVALID_COLNAME_CHARS = [' ', '#', '&', '.']
static add_cdr3_from_junction(df: pandas.DataFrame)[source]
static add_default_fields_for_airr_seq_set(df: pandas.DataFrame)[source]
static drop_empty_sequences(dataframe: pandas.DataFrame, import_empty_aa_sequences: bool, import_empty_nt_sequences: bool, region_type: RegionType) pandas.DataFrame[source]
static drop_illegal_character_sequences(dataframe: pandas.DataFrame, import_illegal_characters: bool, import_with_stop_codon: bool, region_type: RegionType) pandas.DataFrame[source]
static extract_locus_from_data(df: pandas.DataFrame, params: DatasetImportParams, dataset_name: str)[source]
static extract_sequence_dataset_params(items=None, params=None) dict[source]
static filter_illegal_receptors(df: pandas.DataFrame) pandas.DataFrame[source]
classmethod filter_illegal_sequences(df: pandas.DataFrame, params: DatasetImportParams, location: str)[source]
static get_receptor_filter_sort_kwargs(df: pandas.DataFrame, warning='')[source]
static get_repertoire_filename_from_metadata_row(metadata_row: pandas.Series, params: DatasetImportParams) Path[source]
static get_sequence_filenames(path: Path, dataset_name: str)[source]
static get_standardized_name(column_name: str) str[source]
static is_illegal_sequence(sequence, legal_alphabet) bool[source]
static make_new_metadata_file(repertoires: list, metadata: pandas.DataFrame, result_path: Path, dataset_name: str) Path[source]
static parse_sequence_dataframe(df: pandas.DataFrame, params: DatasetImportParams, dataset_name: str) pandas.DataFrame[source]
static standardize_column_names(df)[source]
static standardize_none_values(dataframe: pandas.DataFrame)[source]

immuneML.util.KmerHelper module

class immuneML.util.KmerHelper.KmerHelper[source]

Bases: object

static create_IMGT_gapped_kmers_from_sequence(sequence: ReceptorSequence, sequence_type: SequenceType, k_left: int, max_gap: int, k_right: int = None, min_gap: int = 0, region_type: RegionType = RegionType.IMGT_CDR3)[source]
static create_IMGT_kmers_from_sequence(sequence: ReceptorSequence, k: int, sequence_type: SequenceType, region_type: RegionType = RegionType.IMGT_CDR3)[source]
static create_IMGT_kmers_from_string(sequence: str, k: int, region_type: RegionType)[source]
static create_all_kmers(k: int, alphabet: list)[source]

creates all possible k-mers given a k-mer length and an alphabet :param k: length of k-mer (int) :param alphabet: list of characters from which to make all possible k-mers (list) :return: alphabetically sorted list of k-mers

static create_gapped_kmers_from_sequence(sequence: ReceptorSequence, sequence_type: SequenceType, k_left: int, max_gap: int, k_right: int = None, min_gap: int = 0)[source]
static create_gapped_kmers_from_string(sequence, k_left: int, max_gap: int, k_right: int = None, min_gap: int = 0)[source]
static create_kmers_from_sequence(sequence: ReceptorSequence, k: int, sequence_type: SequenceType, overlap: bool = True)[source]
static create_kmers_from_string(sequence, k: int, overlap: bool = True)[source]
static create_kmers_within_HD(kmer: str, alphabet: list, distance: int = 1)[source]
static create_sentences_from_repertoire(repertoire: Repertoire, k: int, sequence_type: SequenceType, overlap: bool = True, region_type: RegionType = RegionType.IMGT_CDR3)[source]

immuneML.util.Logger module

immuneML.util.Logger.log(func)[source]
immuneML.util.Logger.print_log(mssg, include_datetime=False, log_func_name='info')[source]

immuneML.util.NameBuilder module

class immuneML.util.NameBuilder.NameBuilder[source]

Bases: object

static build_name_from_dict(dictionary: dict, level=0)[source]

Creates a name from dictionary which includes all of its parameters and handles nested dictionaries up to depth of 10 inclusively

Parameters:
  • dictionary (dict) – dictionary to create the name from

  • level (int) – controls recursion level, user should keep default

Returns:

name (str)

immuneML.util.NumpyHelper module

class immuneML.util.NumpyHelper.NumpyHelper[source]

Bases: object

SIMPLE_TYPES = [<class 'str'>, <class 'int'>, <class 'float'>, <class 'bool'>, <class 'numpy.str_'>, <class 'numpy.int64'>, <class 'numpy.float64'>, <class 'numpy.bool_'>]
static get_numpy_representation(obj)[source]

converts object to representation that can be stored without pickle enables in numpy arrays; if it is an object or a dict, it will be serialized to a json string

static group_structured_array_by(data, field)[source]
static is_nan_or_empty(value)[source]
static is_simple_type(t)[source]

returns if the type t is string or a number so that it does not use pickle if serialized

immuneML.util.ParameterValidator module

class immuneML.util.ParameterValidator.ParameterValidator[source]

Bases: object

static assert_all_in_valid_list(values: list, valid_values: list, location: str, parameter_name: str)[source]
static assert_all_type_and_value(values, parameter_type, location: str, parameter_name: str, min_inclusive=None, max_inclusive=None)[source]
static assert_any_value_present(values: list, expected_values: list, location: str, parameter_name: str)[source]
static assert_in_valid_list(value, valid_values: list, location: str, parameter_name: str)[source]
static assert_keys(keys, valid_keys, location: str, parameter_name: str, exclusive: bool = True)[source]
static assert_keys_present(values: list, expected_values: list, location: str, parameter_name: str)[source]
static assert_region_type(params, location: str = '')[source]
static assert_sequence_type(params, location: str = '')[source]
static assert_type_and_value(value, parameter_type, location: str, parameter_name: str, min_inclusive=None, max_inclusive=None, min_exclusive=None, max_exclusive=None, exact_value=None)[source]
static assert_valid_tabular_file(file_path, location: str, parameter_name: str, sep='\t', expected_columns: list = None)[source]

immuneML.util.PathBuilder module

class immuneML.util.PathBuilder.PathBuilder[source]

Bases: object

static build(path, warn_if_exists=False)[source]
static remove_old_and_build(path)[source]

immuneML.util.PositionHelper module

class immuneML.util.PositionHelper.PositionHelper[source]

Bases: object

MAX_CDR3_LEN = 91
MIDPOINT_CDR3_LEN = 13
MIN_CDR3_LEN = 5
static gen_imgt_positions_from_cdr3_length(input_length: int) list[source]
static gen_imgt_positions_from_junction_length(input_length: int)[source]
static gen_imgt_positions_from_length(input_length: int, region_type: RegionType)[source]
static gen_imgt_positions_from_sequence(sequence: ReceptorSequence, sequence_type: SequenceType = SequenceType.AMINO_ACID, region_type: RegionType = RegionType.IMGT_CDR3)[source]
static get_allowed_positions_for_annotation(input_length: int, region_type: RegionType, sequence_position_weights: dict)[source]
static get_imgt_position_weights_for_annotation(input_length: int, region_type: RegionType, sequence_position_weights: dict)[source]
static get_imgt_position_weights_for_implanting(aa_input_length: int, region_type: RegionType, sequence_position_weights: dict, limit: int)[source]

immuneML.util.ReadsType module

class immuneML.util.ReadsType.ReadsType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]

Bases: Enum

ALL = 'all'
UNIQUE = 'unique'

immuneML.util.ReflectionHandler module

class immuneML.util.ReflectionHandler.ReflectionHandler[source]

Bases: object

static all_direct_subclasses(cls, drop_part=None, subdirectory=None)[source]
static all_nonabstract_subclass_basic_names(cls, drop_part: str, subdirectory: str = '')[source]
static all_nonabstract_subclasses(cls, drop_part=None, subdirectory=None)[source]
static all_subclasses(cls)[source]
static discover_classes_by_partial_name(class_name_ending: str, subdirectory: str = '')[source]
static exists(class_name: str, subdirectory: str = '')[source]
static get_class_by_name(class_name: str, subdirectory: str = '')[source]
static get_class_from_path(path, class_name: str = None)[source]

obtain the class reference from the given path

Parameters:
  • path (str or pathlib.Path) – path to file where the class is located

  • class_name (str) – class name to import_dataset from the file; if None, it is assumed that the class name is the same as the file name

Returns:

class

static get_classes_by_partial_name(class_name_ending: str, subdirectory: str = '')[source]
static import_function(function: str, module)[source]
static import_module(name: str, package: str = None)[source]
static is_installed(module_name: str) bool[source]

immuneML.util.RepertoireBuilder module

class immuneML.util.RepertoireBuilder.RepertoireBuilder[source]

Bases: object

Helper class for tests: creates repertoires from a list of a list of sequences and stores them in the given path

static build(sequences: list, path: Path, labels: dict = None, seq_metadata: list = None, subject_ids: list = None, name: str = 'd1')[source]
static build_dataset(sequences: list, path: Path, labels: dict = None, seq_metadata: list = None, subject_ids: list = None, name: str = 'd1')[source]

immuneML.util.SequenceAnalysisHelper module

class immuneML.util.SequenceAnalysisHelper.SequenceAnalysisHelper[source]

Bases: object

static compute_overlap_matrix(hp_items: List[HPItem])[source]

immuneML.util.SignificantFeaturesHelper module

class immuneML.util.SignificantFeaturesHelper.SignificantFeaturesHelper[source]

Bases: object

static load_sequences(ground_truth_sequences_path: Path)[source]
static parse_parameters(kwargs, location)[source]
static parse_sequences_path(kwargs, field_name, location)[source]

immuneML.util.StringHelper module

class immuneML.util.StringHelper.StringHelper[source]

Bases: object

static camel_case_to_word_string(camel_case_string: str)[source]
static camel_case_to_words(camel_case_string: str)[source]
static pad_sequence_in_the_middle(sequence: str, max_len: int, pad_char: str) str[source]

immuneML.util.TCRdistHelper module

class immuneML.util.TCRdistHelper.TCRdistHelper[source]

Bases: object

static add_default_allele_to_v_gene(v_gene: str)[source]
static compute_tcr_dist(dataset: ReceptorDataset, label_names: list, cores: int = 1)[source]
static prepare_tcr_dist_dataframe(dataset: ReceptorDataset, label_names: list) pandas.DataFrame[source]

Module contents