Source code for immuneML.dsl.instruction_parsers.SimulationParser

from pathlib import Path

from immuneML.IO.dataset_export.DataExporter import DataExporter
from immuneML.dsl.symbol_table.SymbolTable import SymbolTable
from immuneML.dsl.symbol_table.SymbolType import SymbolType
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.ReflectionHandler import ReflectionHandler
from immuneML.workflows.instructions.SimulationInstruction import SimulationInstruction



[docs]
class SimulationParser:
    """

    YAML specification:

    .. highlight:: yaml
    .. code-block:: yaml

        definitions:
            dataset:
                my_dataset:
                    ...

            motifs:
                m1:
                    seed: AAC # "/" character denotes the gap in the seed if present (e.g. AA/C)
                    instantiation:
                        GappedKmer:
                            # probability that when hamming distance is allowed a letter in the seed will be replaced by
                            # other alphabet letters - alphabet_weights
                            alphabet_weights:
                                A: 0.2
                                C: 0.2
                                D: 0.4
                                E: 0.2
                            # Relative probabilities of choosing each position in the seed for hamming distance modification.
                            # The probabilities will be scaled to sum to one - position_weights
                            position_weights:
                                0: 1
                                1: 0
                                2: 0
                            hamming_distance_probabilities:
                                0: 0.5 # Hamming distance of 0 (no change) with probability 0.5
                                1: 0.5 # Hamming distance of 1 (one letter change) with probability 0.5
                            min_gap: 0
                            max_gap: 1
            signals:
                s1:
                    motifs: # list of all motifs for signal which will be uniformly sampled to get a motif instance for implanting
                        - m1
                    sequence_position_weights: # likelihood of implanting at IMGT position of receptor sequence
                        107: 0.5
                    implanting: HealthySequence # choose only sequences with no other signals for to implant one of the motifs
            simulations:
                sim1: # one Simulation object consists of a dict of Implanting objects
                    i1:
                        dataset_implanting_rate: 0.5 # percentage of repertoire where the signals will be implanted
                        repertoire_implanting_rate: 0.01 # percentage of sequences within repertoire where the signals will be implanted
                        signals:
                            - s1

        instructions:
            my_simulation_instruction:
                type: Simulation
                dataset: my_dataset
                simulation: sim1
                export_formats: [AIRR, ImmuneML]

    """


[docs]
    def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> SimulationInstruction:
        ParameterValidator.assert_keys(instruction.keys(), ["dataset", "simulation", "type", "export_formats"], "SimulationParser", key)

        signals = [signal.item for signal in symbol_table.get_by_type(SymbolType.SIGNAL)]
        simulation = symbol_table.get(instruction["simulation"])
        dataset = symbol_table.get(instruction["dataset"])

        exporters = self.parse_exporters(instruction)

        process = SimulationInstruction(signals=signals, simulation=simulation, dataset=dataset, name=key, exporters=exporters)
        return process



[docs]
    def parse_exporters(self, instruction):
        if instruction["export_formats"] is not None:
            class_path = "dataset_export/"
            ParameterValidator.assert_all_in_valid_list(instruction["export_formats"],
                                                        ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, 'Exporter', class_path),
                                                        location="SimulationParser", parameter_name="export_formats")
            exporters = [ReflectionHandler.get_class_by_name(f"{item}Exporter", class_path) for item in instruction["export_formats"]]
        else:
            exporters = None

        return exporters