Source code for immuneML.dsl.instruction_parsers.SimulationParser

from pathlib import Path

from immuneML.IO.dataset_export.DataExporter import DataExporter
from immuneML.dsl.symbol_table.SymbolTable import SymbolTable
from immuneML.dsl.symbol_table.SymbolType import SymbolType
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.ReflectionHandler import ReflectionHandler
from immuneML.workflows.instructions.SimulationInstruction import SimulationInstruction


[docs]class SimulationParser: """ YAML specification: .. highlight:: yaml .. code-block:: yaml definitions: dataset: my_dataset: ... motifs: m1: seed: AAC # "/" character denotes the gap in the seed if present (e.g. AA/C) instantiation: GappedKmer: # probability that when hamming distance is allowed a letter in the seed will be replaced by # other alphabet letters - alphabet_weights alphabet_weights: A: 0.2 C: 0.2 D: 0.4 E: 0.2 # Relative probabilities of choosing each position in the seed for hamming distance modification. # The probabilities will be scaled to sum to one - position_weights position_weights: 0: 1 1: 0 2: 0 hamming_distance_probabilities: 0: 0.5 # Hamming distance of 0 (no change) with probability 0.5 1: 0.5 # Hamming distance of 1 (one letter change) with probability 0.5 min_gap: 0 max_gap: 1 signals: s1: motifs: # list of all motifs for signal which will be uniformly sampled to get a motif instance for implanting - m1 sequence_position_weights: # likelihood of implanting at IMGT position of receptor sequence 107: 0.5 implanting: HealthySequence # choose only sequences with no other signals for to implant one of the motifs simulations: sim1: # one Simulation object consists of a dict of Implanting objects i1: dataset_implanting_rate: 0.5 # percentage of repertoire where the signals will be implanted repertoire_implanting_rate: 0.01 # percentage of sequences within repertoire where the signals will be implanted signals: - s1 instructions: my_simulation_instruction: type: Simulation dataset: my_dataset simulation: sim1 export_formats: [AIRR, ImmuneML] """
[docs] def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> SimulationInstruction: ParameterValidator.assert_keys(instruction.keys(), ["dataset", "simulation", "type", "export_formats"], "SimulationParser", key) signals = [signal.item for signal in symbol_table.get_by_type(SymbolType.SIGNAL)] simulation = symbol_table.get(instruction["simulation"]) dataset = symbol_table.get(instruction["dataset"]) exporters = self.parse_exporters(instruction) process = SimulationInstruction(signals=signals, simulation=simulation, dataset=dataset, name=key, exporters=exporters) return process
[docs] def parse_exporters(self, instruction): if instruction["export_formats"] is not None: class_path = "dataset_export/" ParameterValidator.assert_all_in_valid_list(instruction["export_formats"], ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, 'Exporter', class_path), location="SimulationParser", parameter_name="export_formats") exporters = [ReflectionHandler.get_class_by_name(f"{item}Exporter", class_path) for item in instruction["export_formats"]] else: exporters = None return exporters