Source code for immuneML.api.galaxy.GalaxySimulationTool
import logging
import shutil
from pathlib import Path
import yaml
from immuneML.api.galaxy.GalaxyTool import GalaxyTool
from immuneML.api.galaxy.Util import Util
from immuneML.workflows.instructions.SimulationInstruction import SimulationInstruction
[docs]
class GalaxySimulationTool(GalaxyTool):
"""
GalaxySimulationTool is an alternative to running immuneML with the simulation instruction directly. It accepts a YAML specification file and a
path to the output directory. It implants the signals in the dataset that was provided either as an existing dataset with a set of files or in
the random dataset as described in the specification file.
This tool is meant to be used as an endpoint for Galaxy tool that will create a Galaxy collection out of a dataset in immuneML format that can
be readily used by other immuneML-based Galaxy tools.
The specification supplied for this tool is identical to immuneML specification, except that it can include only one instruction which has to
be of type 'Simulation':
.. code-block: yaml
definitions:
datasets:
my_synthetic_dataset:
format: RandomRepertoireDataset
params:
repertoire_count: 100
labels: {}
motifs:
my_simple_motif: # a simple motif without gaps or hamming distance
seed: AAA
instantiation: GappedKmer
my_complex_motif: # complex motif containing a gap + hamming distance
seed: AA/A # ‘/’ denotes gap position if present, if not, there’s no gap
instantiation:
GappedKmer:
min_gap: 1
max_gap: 2
hamming_distance_probabilities: # probabilities for each number of
0: 0.7 # modification to the seed
1: 0.3
position_weights: # probabilities for modification per position
0: 1
1: 0 # note that index 2, the position of the gap,
3: 0 # is excluded from position_weights
alphabet_weights: # probabilities for using each amino acid in
A: 0.2 # a hamming distance modification
C: 0.2
D: 0.4
E: 0.2
signals:
my_signal:
motifs:
- my_simple_motif
- my_complex_motif
implanting: HealthySequence
sequence_position_weights:
109: 1
110: 2
111: 5
112: 1
simulations:
my_simulation:
my_implanting:
signals:
- my_signal
dataset_implanting_rate: 0.5
repertoire_implanting_rate: 0.25
instructions:
my_simulation_instruction: # user-defined name of the instruction
type: Simulation # which instruction to execute
dataset: my_dataset # which dataset to use for implanting the signals
simulation: my_simulation # how to implanting the signals - definition of the simulation
number_of_processes: 4 # how many parallel processes to use during execution
export_formats: [AIRR] # in which formats to export the dataset, ImmuneML format will be added automatically
output: # the output format
format: HTML
"""
def __init__(self, specification_path: Path, result_path: Path, **kwargs):
Util.check_parameters(specification_path, result_path, kwargs, GalaxySimulationTool.__name__)
super().__init__(specification_path, result_path, **kwargs)
def _run(self):
self.prepare_specs()
Util.run_tool(self.yaml_path, self.result_path)
dataset_location = list(self.result_path.glob("*/exported_dataset/*/"))[0]
shutil.copytree(dataset_location, self.result_path / 'result/')
logging.info(f"{GalaxySimulationTool.__name__}: immuneML has finished and the signals were implanted in the dataset.")
[docs]
def prepare_specs(self):
with self.yaml_path.open("r") as file:
specs = yaml.safe_load(file)
instruction_name = Util.check_instruction_type(specs, GalaxySimulationTool.__name__, SimulationInstruction.__name__[:-11])
Util.check_export_format(specs, GalaxySimulationTool.__name__, instruction_name)
Util.update_dataset_key(specs, GalaxySimulationTool.__name__)
Util.check_paths(specs, "GalaxySimulationTool")
Util.update_result_paths(specs, self.result_path, self.yaml_path)