import logging
from pathlib import Path
import yaml
import os
import glob
import pandas as pd
from immuneML.IO.dataset_export.AIRRExporter import AIRRExporter
from immuneML.app.ImmuneMLApp import ImmuneMLApp
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.PathBuilder import PathBuilder
[docs]
class Util:
[docs]
@staticmethod
def check_parameters(yaml_path: Path, output_dir: Path, kwargs, location):
assert isinstance(yaml_path, Path), f"{location}: yaml_path is {output_dir}, expected Path object."
assert isinstance(output_dir, Path), f"{location}: output_dir is {output_dir}, expected Path object pointing to a folder to store the results."
assert yaml_path.is_file(), f"{location}: path to the specification is not correct, got {yaml_path}, expecting path to a YAML file."
[docs]
@staticmethod
def check_paths(specs: dict, tool_name: str):
for key in specs.keys():
if isinstance(specs[key], str):
if "/" in specs[key] and specs[key] != "./" and any(name_part in key for name_part in ('path', 'file')):
logging.warning(f"{tool_name}: the paths in specification for Galaxy have to consist only of the filenames "
f"as uploaded to Galaxy history beforehand. The problem occurs for the parameter {key}.")
elif isinstance(specs[key], dict):
Util.check_paths(specs[key], tool_name)
[docs]
@staticmethod
def update_dataset_key(specs: dict, location, new_key="dataset"):
dataset_keys = list(specs["definitions"]["datasets"].keys())
assert len(dataset_keys) == 1, f"{location}: one dataset has to be defined under definitions/datasets, got {dataset_keys} instead."
orig_key = dataset_keys[0]
if orig_key != "dataset":
specs["definitions"]["datasets"][new_key] = specs["definitions"]["datasets"][orig_key]
specs["definitions"]["datasets"].pop(orig_key)
for instruction_key in specs["instructions"].keys():
if "dataset" in specs["instructions"][instruction_key]:
specs["instructions"][instruction_key]["dataset"] = new_key
if "datasets" in specs["instructions"][instruction_key]:
specs["instructions"][instruction_key]["datasets"] = [new_key]
if "analyses" in specs["instructions"][instruction_key]:
for analysis_key in specs["instructions"][instruction_key]["analyses"].keys():
specs["instructions"][instruction_key]["analyses"][analysis_key]["dataset"] = new_key
logging.info(f"{location}: renamed dataset '{orig_key}' to '{new_key}'.")
[docs]
@staticmethod
def update_result_paths(specs: dict, result_path: Path, yaml_path: Path):
if 'datasets' in specs['definitions']:
for key, item in specs["definitions"]["datasets"].items():
if isinstance(item, dict) and 'params' in item.keys() and isinstance(item["params"], dict):
item['params']["result_path"] = str(result_path / key)
if item['format'] not in ['RandomRepertoireDataset', 'RandomReceptorDataset', 'RandomSequenceDataset']:
item['params']['path'] = str(yaml_path.parent)
with yaml_path.open("w") as file:
yaml.dump(specs, file)
[docs]
@staticmethod
def check_instruction_type(specs: dict, tool_name, expected_instruction) -> str:
ParameterValidator.assert_keys_present(list(specs.keys()), ['definitions', 'instructions'], tool_name, "YAML specification")
assert len(list(specs['instructions'].keys())) == 1, f"{tool_name}: multiple instructions were given " \
f"({str(list(specs['instructions'].keys()))[1:-1]}), but only one instruction of type " \
f"{expected_instruction} should be specified."
instruction_name = list(specs['instructions'].keys())[0]
instruction_type = specs['instructions'][instruction_name]['type']
assert instruction_type == expected_instruction, \
f"{tool_name}: instruction type has to be '{expected_instruction}', got {instruction_type} instead."
return instruction_name
[docs]
@staticmethod
def discover_dataset_path(dataset_name="dataset"):
if os.path.exists(f"{dataset_name}.yaml"):
dataset_path = f"{dataset_name}.yaml"
else:
discovered = glob.glob(f"*{dataset_name}*.yaml")
if len(discovered) == 1:
dataset_path = discovered[0]
else:
raise FileNotFoundError(f"Unable to locate '{dataset_name}.yaml'")
return dataset_path
[docs]
@staticmethod
def remove_path_from_filename(file_path):
return str(Path(file_path).name)
[docs]
@staticmethod
def export_galaxy_dataset(dataset, result_path):
try:
PathBuilder.build(result_path / 'galaxy_dataset')
AIRRExporter.export(dataset, result_path / "galaxy_dataset/")
dataset_file = list(glob.glob(str(result_path / "galaxy_dataset/*.yaml")))[0]
os.rename(dataset_file, result_path / "galaxy_dataset/dataset.yaml")
Util.reformat_galaxy_dataset(result_path / "galaxy_dataset/")
except Exception as e:
raise RuntimeError(f"Error when exporting Galaxy dataset: {e}.")