Source code for immuneML.util.PositionHelper

import logging
import math

from immuneML.data_model.receptor.receptor_sequence.ReceptorSequence import ReceptorSequence


[docs]class PositionHelper:
[docs] @staticmethod def gen_imgt_positions_from_length(input_length: int): start = 105 end = 117 imgt_range = list(range(start, end + 1)) length = input_length if input_length < 14 else 13 imgt_positions = imgt_range[:math.ceil(length / 2)] + imgt_range[-math.floor(length / 2):] if input_length > 13: len_insert = input_length - 13 insert_left = [111 + 0.001 * i for i in range(1, math.floor(len_insert / 2) + 1)] insert_right = [112 + 0.001 * i for i in range(1, math.ceil(len_insert / 2) + 1)] insert = insert_left + list(reversed(insert_right)) imgt_positions[math.ceil(len(imgt_range) / 2):math.ceil(len(imgt_range) / 2)] = insert return imgt_positions
[docs] @staticmethod def gen_imgt_positions_from_sequence(sequence: ReceptorSequence): input_length = len(sequence.get_sequence()) return PositionHelper.gen_imgt_positions_from_length(input_length)
[docs] @staticmethod def adjust_position_weights(sequence_position_weights: dict, imgt_positions, limit: int) -> dict: """ :param sequence_position_weights: weights supplied by the user as to where in the receptor_sequence to implant :param imgt_positions: IMGT positions present in the specific receptor_sequence :param limit: how far from the end of the receptor_sequence the motif at latest must start in order not to elongate the receptor_sequence :return: position_weights for implanting a motif instance into a receptor_sequence """ # filter only position weights where there are imgt positions in the receptor_sequence and 0 if this imgt position is # not in the sequence_position_weights index_limit = len(imgt_positions) - limit position_weights = {int(imgt_positions[k]): sequence_position_weights[imgt_positions[k]] if imgt_positions[k] in sequence_position_weights.keys() and k < index_limit else 0.0 for k in range(len(imgt_positions))} weights_sum = sum([position_weights[k] for k in sequence_position_weights.keys() if k in position_weights]) # normalize weights if weights_sum != 0: position_weights = {int(k): float(position_weights[k]) / float(weights_sum) for k in position_weights.keys()} else: position_weights = {int(k): 1 / len(position_weights.keys()) for k in position_weights} return position_weights
[docs] @staticmethod def build_position_weights(sequence_position_weights: dict, imgt_positions, limit: int) -> dict: if sequence_position_weights is not None: position_weights = PositionHelper.adjust_position_weights(sequence_position_weights, imgt_positions, limit) else: valid_position_count = len(imgt_positions) - limit position_weights = {imgt_positions[i]: 1.0 / valid_position_count if i < valid_position_count else 0 for i in range(len(imgt_positions))} logging.warning('Position weights are not defined. Randomly choosing position to implant motif_instance instead.') return position_weights