Source code for immuneML.simulation.sequence_implanting.GappedMotifImplanting

import copy

import numpy as np

from immuneML.data_model.receptor.receptor_sequence.ReceptorSequence import ReceptorSequence
from immuneML.data_model.receptor.receptor_sequence.SequenceAnnotation import SequenceAnnotation
from immuneML.environment.EnvironmentSettings import EnvironmentSettings
from immuneML.simulation.implants.ImplantAnnotation import ImplantAnnotation
from immuneML.simulation.implants.MotifInstance import MotifInstance
from immuneML.simulation.sequence_implanting.SequenceImplantingStrategy import SequenceImplantingStrategy
from immuneML.util.PositionHelper import PositionHelper


[docs] class GappedMotifImplanting(SequenceImplantingStrategy):
[docs] def implant(self, sequence: ReceptorSequence, signal: dict, sequence_position_weights=None) -> ReceptorSequence: motif_instance = signal["motif_instance"] imgt_positions = self._build_imgt_positions(sequence, motif_instance) limit = len(motif_instance.instance) - motif_instance.instance.count("/") + motif_instance.gap - 1 position_weights = PositionHelper.build_position_weights(sequence_position_weights, imgt_positions, limit) implant_position = self._choose_implant_position(imgt_positions, position_weights) new_sequence = self._build_new_sequence(sequence, implant_position, signal) return new_sequence
def _build_imgt_positions(self, sequence: ReceptorSequence, motif_instance: MotifInstance): assert len(sequence.get_sequence()) >= motif_instance.gap + len(motif_instance.instance), \ ("The motif instance is longer than receptor_sequence length. Remove the receptor_sequence from the " "repertoire or reduce max gap length to be able to proceed. ") return PositionHelper.gen_imgt_positions_from_sequence(sequence) def _choose_implant_position(self, imgt_positions, position_weights): imgt_implant_position = np.random.choice(list(position_weights.keys()), size=1, p=list(position_weights.values())) position = np.where(imgt_positions == imgt_implant_position)[0][0] return position def _build_new_sequence(self, sequence: ReceptorSequence, position, signal: dict) -> ReceptorSequence: gap_length = signal["motif_instance"].gap if "/" in signal["motif_instance"].instance: motif_left, motif_right = signal["motif_instance"].instance.split("/") else: motif_left = signal["motif_instance"].instance motif_right = "" gap_start = position+len(motif_left) gap_end = gap_start+gap_length part1 = sequence.get_sequence()[:position] part2 = sequence.get_sequence()[gap_start:gap_end] part3 = sequence.get_sequence()[gap_end+len(motif_right):] new_sequence_string = part1 + motif_left + part2 + motif_right + part3 annotation = SequenceAnnotation() implant = ImplantAnnotation(signal_id=signal["signal_id"], motif_id=signal["motif_id"], motif_instance=str(signal["motif_instance"]), position=position) annotation.add_implant(implant) new_sequence = ReceptorSequence() new_sequence.set_annotation(annotation) new_sequence.set_metadata(copy.deepcopy(sequence.metadata)) new_sequence.set_sequence(new_sequence_string, EnvironmentSettings.get_sequence_type()) return new_sequence