Source code for immuneML.simulation.signal_implanting_strategy.FullSequenceImplanting

import copy
import math
import random
from pathlib import Path
from typing import List

from immuneML.data_model.receptor.receptor_sequence.ReceptorSequence import ReceptorSequence
from immuneML.data_model.receptor.receptor_sequence.SequenceAnnotation import SequenceAnnotation
from immuneML.data_model.receptor.receptor_sequence.SequenceMetadata import SequenceMetadata
from immuneML.data_model.repertoire.Repertoire import Repertoire
from immuneML.simulation.implants.ImplantAnnotation import ImplantAnnotation
from immuneML.simulation.signal_implanting_strategy.SignalImplantingStrategy import SignalImplantingStrategy


[docs]class FullSequenceImplanting(SignalImplantingStrategy): """ This class represents a :py:obj:`~immuneML.simulation.signal_implanting_strategy.SignalImplantingStrategy.SignalImplantingStrategy` where signals will be implanted in the repertoire by replacing `repertoire_implanting_rate` percent of the sequences with sequences generated from the motifs of the signal. Motifs here cannot include gaps and the motif instances are the full sequences and will be a part of the repertoire. Arguments: this signal implanting strategy has no arguments. YAML specification: .. indent with spaces .. code-block:: yaml motifs: my_motif: # cannot include gaps ... signals: my_signal: motifs: - my_motif implanting: FullSequence """
[docs] def implant_in_repertoire(self, repertoire: Repertoire, repertoire_implanting_rate: float, signal, path: Path): assert all("/" not in motif.seed for motif in signal.motifs), \ f'FullSequenceImplanting: motifs cannot include gaps. Check motifs {[motif.identifier for motif in signal.motifs]}.' sequences = repertoire.sequences new_sequence_count = math.ceil(len(sequences) * repertoire_implanting_rate) assert new_sequence_count > 0, \ f"FullSequenceImplanting: there are too few sequences ({len(sequences)}) in the repertoire with identifier {repertoire.identifier} " \ f"to have the given repertoire implanting rate ({repertoire_implanting_rate}). Please consider increasing the repertoire implanting rate." new_sequences = self._create_new_sequences(sequences, new_sequence_count, signal) metadata = copy.deepcopy(repertoire.metadata) metadata[signal.id] = True return Repertoire.build_from_sequence_objects(new_sequences, path, metadata)
def _create_new_sequences(self, sequences, new_sequence_count, signal) -> List[ReceptorSequence]: new_sequences = sequences[:-new_sequence_count] for _ in range(new_sequence_count): motif = random.choice(signal.motifs) motif_instance = motif.instantiate_motif() annotation = SequenceAnnotation([ImplantAnnotation(signal_id=signal.id, motif_id=motif.identifier, motif_instance=motif_instance.instance, position=0)]) metadata = SequenceMetadata(v_gene="TRBV6-1", j_gene="TRBJ2-7", count=1, chain="B") new_sequences.append(ReceptorSequence(amino_acid_sequence=motif_instance.instance, annotation=annotation, metadata=metadata)) return new_sequences
[docs] def implant_in_receptor(self, receptor, signal, is_noise: bool): raise RuntimeError("FullSequenceImplanting was called on a receptor object. Check the simulation parameters.")