Source code for immuneML.api.galaxy.build_ligo_yaml

import argparse
import sys
from pathlib import Path

from immuneML.data_model.bnp_util import write_yaml
from immuneML.util.PathBuilder import PathBuilder


[docs] def parse_command_line_arguments(args): parser = argparse.ArgumentParser(description="Tool for building specification for simulating data with LiGO in Galaxy.") parser.add_argument("-m", "--motif_seed", required=True, help="Seed of the motif to implant.") parser.add_argument("-e", "--example_with_motif_count", required=True, type=int, help="Number of examples to generate containing the motif.") parser.add_argument("-w", "--example_without_motif_count", required=True, type=int, help="Number of examples to generate NOT containing the motif.") parser.add_argument("-s", "--simulation_strategy", choices=["RejectionSampling", "Implanting"], required=True, help="Strategy for simulating signals into sequences.") parser.add_argument("-d", "--dataset_type", choices=["repertoire", "sequence"], required=True, help="Type of dataset to generate (RepertoireDataset or SequenceDataset, ReceptorDataset is currently not available)") parser.add_argument("-c", "--chain_type", choices=["humanTRA", "humanTRB", "humanIGH", "humanIGK", "humanIGL"], required=True, help="Chain type for the simulated dataset, to be used for default OLGA model selection.") parser.add_argument("-p", "--signal_percentage", type=float, default=100, help="Percentage of sequences that contain the signal.") parser.add_argument("-r", "--repertoire_size", type=int, required=False, help="Number of sequences per repertoire, if dataset_type is repertoire.") parser.add_argument("-o", "--output_path", required=True, help="Output location for the generated yaml file (directory).") parser.add_argument("-f", "--file_name", default="specs.yaml", help="Output file name for the yaml file. Default name is 'specs.yaml' if not specified.") return parser.parse_args(args)
[docs] def build_specs(parsed_args): specs = { "definitions": { "motifs": { "motif1": { "seed": parsed_args.motif_seed } }, "signals": { "signal1": { "motifs": ["motif1"] } }, "simulations": { "sim1": { "sim_items": { "signal": { "generative_model": { "default_model_name": parsed_args.chain_type, "type": "OLGA" }, "number_of_examples": parsed_args.example_with_motif_count, "signals": {"signal1": parsed_args.signal_percentage / 100}, "seed": 100, "receptors_in_repertoire_count": parsed_args.repertoire_size if parsed_args.dataset_type == "repertoire" else None }, "no_signal": { "generative_model": { "default_model_name": parsed_args.chain_type, "type": "OLGA" }, "number_of_examples": parsed_args.example_without_motif_count, "signals": {}, "seed": 200, "receptors_in_repertoire_count": parsed_args.repertoire_size if parsed_args.dataset_type == "repertoire" else None } }, "is_repertoire": True if parsed_args.dataset_type == "repertoire" else False, "sequence_type": "amino_acid", "simulation_strategy": parsed_args.simulation_strategy } } }, "instructions": { f"simulate_with_ligo": { "type": "LigoSim", "simulation": "sim1", "number_of_processes": 8, "export_p_gens": False, } } } return specs
[docs] def main(args): parsed_args = parse_command_line_arguments(args) specs = build_specs(parsed_args) PathBuilder.build(parsed_args.output_path) output_location = Path(parsed_args.output_path) / parsed_args.file_name write_yaml(output_location, specs) return str(output_location)
if __name__ == "__main__": main(sys.argv[1:])