Source code for immuneML.util.PositionHelper

import logging
import math

from immuneML.data_model.receptor.receptor_sequence.ReceptorSequence import ReceptorSequence


[docs]class PositionHelper:
[docs]    @staticmethod
    def gen_imgt_positions_from_length(input_length: int):
        start = 105
        end = 117
        imgt_range = list(range(start, end + 1))
        length = input_length if input_length < 14 else 13
        imgt_positions = imgt_range[:math.ceil(length / 2)] + imgt_range[-math.floor(length / 2):]
        if input_length > 13:
            len_insert = input_length - 13
            insert_left = [111 + 0.001 * i for i in range(1, math.floor(len_insert / 2) + 1)]
            insert_right = [112 + 0.001 * i for i in range(1, math.ceil(len_insert / 2) + 1)]
            insert = insert_left + list(reversed(insert_right))
            imgt_positions[math.ceil(len(imgt_range) / 2):math.ceil(len(imgt_range) / 2)] = insert
        return imgt_positions

[docs]    @staticmethod
    def gen_imgt_positions_from_sequence(sequence: ReceptorSequence):
        input_length = len(sequence.get_sequence())
        return PositionHelper.gen_imgt_positions_from_length(input_length)

[docs]    @staticmethod
    def adjust_position_weights(sequence_position_weights: dict, imgt_positions, limit: int) -> dict:
        """
        :param sequence_position_weights: weights supplied by the user as to where in the receptor_sequence to implant
        :param imgt_positions: IMGT positions present in the specific receptor_sequence
        :param limit: how far from the end of the receptor_sequence the motif at latest must start
                        in order not to elongate the receptor_sequence
        :return: position_weights for implanting a motif instance into a receptor_sequence
        """
        # filter only position weights where there are imgt positions in the receptor_sequence and 0 if this imgt position is
        # not in the sequence_position_weights
        index_limit = len(imgt_positions) - limit

        position_weights = {int(imgt_positions[k]): sequence_position_weights[imgt_positions[k]]
                            if imgt_positions[k] in sequence_position_weights.keys() and k < index_limit else 0.0 for k
                            in range(len(imgt_positions))}
        weights_sum = sum([position_weights[k] for k in sequence_position_weights.keys() if k in position_weights])
        # normalize weights
        if weights_sum != 0:
            position_weights = {int(k): float(position_weights[k]) / float(weights_sum) for k in position_weights.keys()}
        else:
            position_weights = {int(k): 1 / len(position_weights.keys()) for k in position_weights}
        return position_weights

[docs]    @staticmethod
    def build_position_weights(sequence_position_weights: dict, imgt_positions, limit: int) -> dict:
        if sequence_position_weights is not None:
            position_weights = PositionHelper.adjust_position_weights(sequence_position_weights, imgt_positions, limit)
        else:
            valid_position_count = len(imgt_positions) - limit
            position_weights = {imgt_positions[i]: 1.0 / valid_position_count if i < valid_position_count else 0
                                for i in range(len(imgt_positions))}
            logging.warning('Position weights are not defined. Randomly choosing position to implant motif_instance instead.')

        return position_weights