Source code for immuneML.api.galaxy.build_ligo_yaml

import argparse
import sys
from pathlib import Path

from immuneML.data_model.bnp_util import write_yaml
from immuneML.util.PathBuilder import PathBuilder



[docs]
def parse_command_line_arguments(args):
    parser = argparse.ArgumentParser(description="Tool for building specification for simulating data with LiGO in Galaxy.")

    parser.add_argument("-m", "--motif_seed", required=True,
                        help="Seed of the motif to implant.")
    parser.add_argument("-e", "--example_with_motif_count", required=True, type=int,
                        help="Number of examples to generate containing the motif.")
    parser.add_argument("-w", "--example_without_motif_count", required=True, type=int,
                        help="Number of examples to generate NOT containing the motif.")
    parser.add_argument("-s", "--simulation_strategy",  choices=["RejectionSampling", "Implanting"], required=True,
                        help="Strategy for simulating signals into sequences.")
    parser.add_argument("-d", "--dataset_type",  choices=["repertoire", "sequence"], required=True,
                        help="Type of dataset to generate (RepertoireDataset or SequenceDataset, ReceptorDataset is currently not available)")
    parser.add_argument("-c", "--chain_type",  choices=["humanTRA", "humanTRB", "humanIGH", "humanIGK", "humanIGL"], required=True,
                        help="Chain type for the simulated dataset, to be used for default OLGA model selection.")
    parser.add_argument("-p", "--signal_percentage",  type=float, default=100,
                        help="Percentage of sequences that contain the signal.")
    parser.add_argument("-r", "--repertoire_size",  type=int, required=False,
                        help="Number of sequences per repertoire, if dataset_type is repertoire.")

    parser.add_argument("-o", "--output_path", required=True,
                        help="Output location for the generated yaml file (directory).")
    parser.add_argument("-f", "--file_name", default="specs.yaml",
                        help="Output file name for the yaml file. Default name is 'specs.yaml' if not specified.")

    return parser.parse_args(args)




[docs]
def build_specs(parsed_args):

    specs = {
        "definitions": {
            "motifs": {
                "motif1": {
                    "seed": parsed_args.motif_seed
                }
            },
            "signals": {
                "signal1": {
                    "motifs": ["motif1"]
                }
            },
            "simulations": {
                "sim1": {
                    "sim_items": {
                        "signal": {
                            "generative_model": {
                                "default_model_name": parsed_args.chain_type,
                                "type": "OLGA"
                            },
                            "number_of_examples": parsed_args.example_with_motif_count,
                            "signals": {"signal1": parsed_args.signal_percentage / 100},
                            "seed": 100,
                            "receptors_in_repertoire_count": parsed_args.repertoire_size if parsed_args.dataset_type == "repertoire" else None
                        },
                        "no_signal": {
                            "generative_model": {
                                "default_model_name": parsed_args.chain_type,
                                "type": "OLGA"
                            },
                            "number_of_examples": parsed_args.example_without_motif_count,
                            "signals": {},
                            "seed": 200,
                            "receptors_in_repertoire_count": parsed_args.repertoire_size if parsed_args.dataset_type == "repertoire" else None
                        }
                    },
                    "is_repertoire": True if parsed_args.dataset_type == "repertoire" else False,
                    "sequence_type": "amino_acid",
                    "simulation_strategy": parsed_args.simulation_strategy
                }
            }
        },
        "instructions": {
            f"simulate_with_ligo": {
                "type": "LigoSim",
                "simulation": "sim1",
                "number_of_processes": 8,
                "export_p_gens": False,
            }
        }
    }

    return specs



[docs]
def main(args):
    parsed_args = parse_command_line_arguments(args)
    specs = build_specs(parsed_args)

    PathBuilder.build(parsed_args.output_path)
    output_location = Path(parsed_args.output_path) / parsed_args.file_name

    write_yaml(output_location, specs)

    return str(output_location)



if __name__ == "__main__":
    main(sys.argv[1:])