Source code for immuneML.api.galaxy.DatasetGenerationOverviewTool
from pathlib import Path
import yaml
from immuneML.api.galaxy.GalaxyTool import GalaxyTool
from immuneML.api.galaxy.Util import Util
from immuneML.app.ImmuneMLApp import ImmuneMLApp
from immuneML.util.Logger import print_log
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.PathBuilder import PathBuilder
from immuneML.workflows.instructions.exploratory_analysis.ExploratoryAnalysisInstruction import \
ExploratoryAnalysisInstruction
[docs]
class DatasetGenerationOverviewTool(GalaxyTool):
"""
DatasetGenerationOverviewTool is an alternative to running ImmuneMLApp directly.
This tool is meant to be used as an endpoint for Galaxy tool that will create a Galaxy collection out of a dataset in immuneML format.
This tool accepts a path to a YAML specification which uses a single dataset, and runs the ExploratoryAnalysisInstruction with optional reports.
The created dataset will be located in the supplied output directory, under the 'galaxy_dataset' folder.
The main dataset file will have the name of the dataset given in the specification and has an extension .yaml.
"""
def __init__(self, specification_path: Path, result_path: Path, **kwargs):
Util.check_parameters(specification_path, result_path, kwargs, "Dataset generation tool")
super().__init__(specification_path, result_path, **kwargs)
def _run(self):
PathBuilder.build(self.result_path)
self._update_specs()
state = ImmuneMLApp(self.yaml_path, self.result_path).run()[0]
dataset = list(state.exploratory_analysis_units.values())[0].dataset
Util.export_galaxy_dataset(dataset, self.result_path)
print_log(f"Exported dataset.")
def _update_specs(self):
with self.yaml_path.open('r') as file:
specs = yaml.safe_load(file)
ParameterValidator.assert_keys_present(specs.keys(), ["definitions", "instructions"], DatasetGenerationOverviewTool.__name__, "YAML specification")
ParameterValidator.assert_all_in_valid_list(specs.keys(), ["definitions", "instructions", "output"], DatasetGenerationOverviewTool.__name__, "YAML specification")
self._check_dataset(specs)
self._check_instruction(specs)
Util.update_dataset_key(specs, DatasetGenerationOverviewTool.__name__)
Util.check_paths(specs, DatasetGenerationOverviewTool.__name__)
Util.update_result_paths(specs, self.result_path, self.yaml_path)
def _check_dataset(self, specs):
ParameterValidator.assert_keys_present(specs["definitions"].keys(), ['datasets'], DatasetGenerationOverviewTool.__name__, 'definitions')
assert len(specs['definitions']['datasets'].keys()) == 1, \
f"{DatasetGenerationOverviewTool.__name__}: only one dataset can be defined with this Galaxy tool, got these " \
f"instead: {list(specs['definitions']['datasets'].keys())}."
def _check_instruction(self, specs):
assert len(specs['instructions'].keys()) == 1, \
f"{DatasetGenerationOverviewTool.__name__}: only one instruction of type ExploratoryAnalysis can be defined with this Galaxy tool, got these " \
f"instructions instead: {list(specs['instructions'].keys())}."
instruction_name = Util.check_instruction_type(specs, DatasetGenerationOverviewTool.__name__, ExploratoryAnalysisInstruction.__name__[:-11])
dataset_name = None
for analysis_key, analysis_specs in specs['instructions'][instruction_name]["analyses"].items():
ParameterValidator.assert_keys_present(analysis_specs.keys(), ["dataset", "report"],
DatasetGenerationOverviewTool.__name__, f"{instruction_name}/analyses/{analysis_key}")
if dataset_name is None:
dataset_name = analysis_specs["dataset"]
else:
assert analysis_specs["dataset"] == dataset_name, f"{DatasetGenerationOverviewTool.__name__}: expected only one dataset name. Found: {dataset_name} and {analysis_specs['dataset']}"