Source code for immuneML.api.galaxy.GalaxySimulationTool
import logging
from pathlib import Path
import yaml
from immuneML.IO.dataset_export.AIRRExporter import AIRRExporter
from immuneML.api.galaxy.GalaxyTool import GalaxyTool
from immuneML.api.galaxy.Util import Util
from immuneML.app.ImmuneMLApp import ImmuneMLApp
from immuneML.util.PathBuilder import PathBuilder
[docs]
class GalaxySimulationTool(GalaxyTool):
"""
GalaxySimulationTool is an alternative to running immuneML with the simulation instruction directly. It accepts a YAML specification file and a
path to the output directory. It implants the signals in the dataset that was provided either as an existing dataset with a set of files or in
the random dataset as described in the specification file.
This tool is meant to be used as an endpoint for Galaxy tool that will create a Galaxy collection out of a dataset in immuneML format that can
be readily used by other immuneML-based Galaxy tools.
The specification supplied for this tool is identical to immuneML specification, except that it can include only one instruction which has to
be of type 'Simulation':
.. code-block: yaml
definitions:
motifs:
motif1:
seed: AA
motif2:
seed: GG
signals:
signal1:
motifs: [motif1]
signal2:
motifs: [motif2]
simulations:
sim1:
is_repertoire: true
paired: false
sequence_type: amino_acid
simulation_strategy: Implanting
remove_seqs_with_signals: true # remove signal-specific AIRs from the background
sim_items:
sim_item: # group of AIRs with the same parameters
AIRR1:
immune_events:
ievent1: True
ievent1: False
signals: [signal1: 0.3, signal2: 0.3]
number_of_examples: 10
is_noise: False
receptors_in_repertoire_count: 6,
generative_model:
chain: heavy
default_model_name: humanIGH
model_path: null
type: OLGA
AIRR2:
immune_events:
ievent1: False
ievent1: True
signals: [signal1: 0.5, signal2: 0.5]
number_of_examples: 10
is_noise: False
receptors_in_repertoire_count: 6,
generative_model:
chain: heavy
default_model_name: humanIGH
model_path: null
type: OLGA
instructions:
my_sim_inst:
export_p_gens: false
max_iterations: 100
number_of_processes: 4
sequence_batch_size: 1000
simulation: sim1
type: LigoSim
"""
def __init__(self, specification_path: Path, result_path: Path, **kwargs):
Util.check_parameters(specification_path, result_path, kwargs, GalaxySimulationTool.__name__)
super().__init__(specification_path, result_path, **kwargs)
def _run(self):
PathBuilder.build(self.result_path)
specs = self._prepare_specs()
Util.check_instruction_type(specs, 'GalaxySimulationTool', "LigoSim")
app = ImmuneMLApp(self.yaml_path, self.result_path)
state = app.run()[0]
dataset = state.resulting_dataset
Util.export_galaxy_dataset(dataset, self.result_path)
logging.info(f"{GalaxySimulationTool.__name__}: the simulation is finished.")
def _prepare_specs(self) -> dict:
with self.yaml_path.open("r") as file:
specs = yaml.safe_load(file)
Util.check_paths(specs, "GalaxySimulationTool")
Util.update_result_paths(specs, self.result_path, self.yaml_path)
return specs