Source code for immuneML.util.PositionHelper

import logging

from immuneML.data_model.receptor.RegionType import RegionType
from immuneML.data_model.receptor.receptor_sequence.ReceptorSequence import ReceptorSequence
from immuneML.environment.SequenceType import SequenceType


[docs] class PositionHelper: MAX_CDR3_LEN = 91 MIN_CDR3_LEN = 5 MIDPOINT_CDR3_LEN = 13
[docs] @staticmethod def gen_imgt_positions_from_cdr3_length(input_length: int): if PositionHelper.MIN_CDR3_LEN <= input_length <= PositionHelper.MIDPOINT_CDR3_LEN: positions = [105, 106, 107, 116, 117] pos_left_count = (input_length - PositionHelper.MIN_CDR3_LEN) // 2 pos_right_count = input_length - PositionHelper.MIN_CDR3_LEN - pos_left_count positions = ([str(pos) for pos in positions if pos <= 107] + [str(i) for i in range(108, 107 + pos_left_count + 1)] + [str(i) for i in range(116 - pos_right_count, 116)] + ['116', '117']) return positions elif PositionHelper.MIDPOINT_CDR3_LEN < input_length <= PositionHelper.MAX_CDR3_LEN: positions = list(range(105, 118)) pos111_count = (input_length - PositionHelper.MIDPOINT_CDR3_LEN) // 2 pos112_count = input_length - PositionHelper.MIDPOINT_CDR3_LEN - pos111_count positions = ([str(pos) for pos in positions if pos <= 111] + [f'111.{i}' for i in range(1, pos111_count + 1)] + [f'112.{i}' for i in range(pos112_count, 0, -1)] + [str(pos) for pos in positions if pos >= 112]) return positions else: logging.warning(f"IMGT positions could not be generated for CDR3 sequence of length {input_length}.") return []
[docs] @staticmethod def gen_imgt_positions_from_junction_length(input_length: int): if PositionHelper.MIN_CDR3_LEN + 2 <= input_length <= PositionHelper.MAX_CDR3_LEN + 2: return ['104'] + PositionHelper.gen_imgt_positions_from_cdr3_length(input_length - 2) + ['118'] else: logging.warning( f"IMGT positions could not be generated for IMGT junction sequence of length {input_length}.") return []
[docs] @staticmethod def gen_imgt_positions_from_sequence(sequence: ReceptorSequence, sequence_type=SequenceType.AMINO_ACID): if sequence_type != sequence_type.AMINO_ACID: raise NotImplementedError(f"PositionHelper: IMGT positions are not implemented for sequence type {sequence_type}") region_type = sequence.get_attribute("region_type") input_length = len(sequence.get_sequence()) return PositionHelper.gen_imgt_positions_from_length(input_length, region_type)
[docs] @staticmethod def gen_imgt_positions_from_length(input_length: int, region_type: RegionType): if region_type == RegionType.IMGT_CDR3: return PositionHelper.gen_imgt_positions_from_cdr3_length(input_length) if region_type == RegionType.IMGT_JUNCTION: return PositionHelper.gen_imgt_positions_from_junction_length(input_length) else: raise NotImplementedError(f"PositionHelper: IMGT positions are not implemented for region type {region_type}")
[docs] @staticmethod def adjust_position_weights(sequence_position_weights: dict, imgt_positions, limit: int) -> dict: """ :param sequence_position_weights: weights supplied by the user as to where in the receptor_sequence to implant :param imgt_positions: IMGT positions present in the specific receptor_sequence :param limit: how far from the end of the receptor_sequence the motif at latest must start in order not to elongate the receptor_sequence :return: position_weights for implanting a motif instance into a receptor_sequence """ # filter only position weights where there are imgt positions in the receptor_sequence and 0 if this imgt position is # not in the sequence_position_weights index_limit = len(imgt_positions) - limit position_weights = {imgt_positions[k]: sequence_position_weights[imgt_positions[k]] if imgt_positions[k] in sequence_position_weights.keys() and k < index_limit else 0.0 for k in range(len(imgt_positions))} weights_sum = sum([position_weights[k] for k in sequence_position_weights.keys() if k in position_weights]) # normalize weights if weights_sum != 0: position_weights = {k: float(position_weights[k]) / float(weights_sum) for k in position_weights.keys()} else: position_weights = {k: 1 / len(position_weights.keys()) for k in position_weights} return position_weights
[docs] @staticmethod def build_position_weights(sequence_position_weights: dict, imgt_positions, limit: int) -> dict: if sequence_position_weights is not None: position_weights = PositionHelper.adjust_position_weights(sequence_position_weights, imgt_positions, limit) else: valid_position_count = len(imgt_positions) - limit position_weights = {imgt_positions[i]: 1.0 / valid_position_count if i < valid_position_count else 0 for i in range(len(imgt_positions))} logging.warning('Position weights are not defined. Randomly choosing position to implant motif_instance instead.') return position_weights