Source code for immuneML.api.galaxy.GalaxySimulationTool

import logging
from pathlib import Path

import yaml

from immuneML.IO.dataset_export.AIRRExporter import AIRRExporter
from immuneML.api.galaxy.GalaxyTool import GalaxyTool
from immuneML.api.galaxy.Util import Util
from immuneML.app.ImmuneMLApp import ImmuneMLApp
from immuneML.util.PathBuilder import PathBuilder


[docs] class GalaxySimulationTool(GalaxyTool): """ GalaxySimulationTool is an alternative to running immuneML with the simulation instruction directly. It accepts a YAML specification file and a path to the output directory. It implants the signals in the dataset that was provided either as an existing dataset with a set of files or in the random dataset as described in the specification file. This tool is meant to be used as an endpoint for Galaxy tool that will create a Galaxy collection out of a dataset in immuneML format that can be readily used by other immuneML-based Galaxy tools. The specification supplied for this tool is identical to immuneML specification, except that it can include only one instruction which has to be of type 'Simulation': .. code-block: yaml definitions: motifs: motif1: seed: AA motif2: seed: GG signals: signal1: motifs: [motif1] signal2: motifs: [motif2] simulations: sim1: is_repertoire: true paired: false sequence_type: amino_acid simulation_strategy: Implanting remove_seqs_with_signals: true # remove signal-specific AIRs from the background sim_items: sim_item: # group of AIRs with the same parameters AIRR1: immune_events: ievent1: True ievent1: False signals: [signal1: 0.3, signal2: 0.3] number_of_examples: 10 is_noise: False receptors_in_repertoire_count: 6, generative_model: chain: heavy default_model_name: humanIGH model_path: null type: OLGA AIRR2: immune_events: ievent1: False ievent1: True signals: [signal1: 0.5, signal2: 0.5] number_of_examples: 10 is_noise: False receptors_in_repertoire_count: 6, generative_model: chain: heavy default_model_name: humanIGH model_path: null type: OLGA instructions: my_sim_inst: export_p_gens: false max_iterations: 100 number_of_processes: 4 sequence_batch_size: 1000 simulation: sim1 type: LigoSim """ def __init__(self, specification_path: Path, result_path: Path, **kwargs): Util.check_parameters(specification_path, result_path, kwargs, GalaxySimulationTool.__name__) super().__init__(specification_path, result_path, **kwargs) def _run(self): PathBuilder.build(self.result_path) specs = self._prepare_specs() Util.check_instruction_type(specs, 'GalaxySimulationTool', "LigoSim") app = ImmuneMLApp(self.yaml_path, self.result_path) state = app.run()[0] dataset = state.resulting_dataset Util.export_galaxy_dataset(dataset, self.result_path) logging.info(f"{GalaxySimulationTool.__name__}: the simulation is finished.") def _prepare_specs(self) -> dict: with self.yaml_path.open("r") as file: specs = yaml.safe_load(file) Util.check_paths(specs, "GalaxySimulationTool") Util.update_result_paths(specs, self.result_path, self.yaml_path) return specs