import logging
import os
import pathlib
import shutil
import sys
import warnings
from pathlib import Path
import yaml
from immuneML.app.ImmuneMLApp import ImmuneMLApp
from immuneML.util.PathBuilder import PathBuilder
[docs]
class Quickstart:
[docs]
def create_specfication(self, path: Path):
specs = {
"definitions": {
"datasets": {
"d1": {
"format": "AIRR",
"params": {
'path': f"{path}/../synthetic_dataset/result/simulation_instruction/exported_dataset/airr",
"dataset_file": f"{path}/../synthetic_dataset/result/simulation_instruction/exported_dataset/airr/dataset.yaml",
'metadata_file': f"{path}/../synthetic_dataset/result/simulation_instruction/exported_dataset/airr/metadata.csv",
"import_illegal_characters": True,
'import_productive': False,
'import_out_of_frame': True
}
}
},
"encodings": {
"e1": {
"KmerFrequency": {
"k": 3
}
},
"e2": {
"KmerFrequency": {
"k": 2
}
}
},
"ml_methods": {
"simpleLR": {
"LogisticRegression": {
"C": 0.1,
"penalty": "l1",
"max_iter": 200
}}
},
"reports": {
"rep1": {
"SequenceLengthDistribution": {
"batch_size": 3,
"region_type": "IMGT_CDR3",
"sequence_type": "amino_acid"
}
},
"hprep": "MLSettingsPerformance",
"coef": "Coefficients"
},
},
"instructions": {
"machine_learning_instruction": {
"type": "TrainMLModel",
"settings": [
{
"encoding": "e1",
"ml_method": "simpleLR"
},
{
"encoding": "e2",
"ml_method": "simpleLR"
}
],
"assessment": {
"split_strategy": "random",
"split_count": 1,
"training_percentage": 0.7,
"reports": {
"data_splits": ["rep1"],
'models': ["coef"]
}
},
"selection": {
"split_strategy": "random",
"split_count": 1,
"training_percentage": 0.7,
"reports": {
"data_splits": ["rep1"],
"models": [],
}
},
"labels": ["my_signal"],
"dataset": "d1",
"strategy": "GridSearch",
"metrics": ["accuracy"],
"reports": ["hprep"],
"number_of_processes": 3,
"optimization_metric": "balanced_accuracy",
"refit_optimal_model": False,
'region_type': 'IMGT_CDR3',
'sequence_type': "amino_acid"
}
}
}
PathBuilder.build(path)
specs_file = path / "specs.yaml"
with specs_file.open("w") as file:
yaml.dump(specs, file)
return specs_file
[docs]
def build_path(self, path: str = None):
if path is None:
path = pathlib.Path.cwd() / "quickstart/"
if os.path.isdir(path):
shutil.rmtree(path)
PathBuilder.build(path)
else:
path = PathBuilder.build(path)
return path
def _simulate_dataset_with_signals(self, path: Path):
print("immuneML quickstart: generating a synthetic dataset...")
PathBuilder.build(path)
specs = {
"definitions": {
"motifs": {"my_motif": {"seed": "AAA"}},
"signals": {"my_signal": {"motifs": ["my_motif"]}},
"simulations": {
"quickstart_simulation": {
'is_repertoire': True,
'paired': False,
'sequence_type': 'amino_acid',
'simulation_strategy': 'Implanting',
'remove_seqs_with_signals': False,
'sim_items': {
"pos_reps": {
"signals": {"my_signal": 0.2},
"number_of_examples": 50,
"receptors_in_repertoire_count": 10,
"generative_model": {
"default_model_name": "humanTRB",
"type": "OLGA"
}
},
"neg_reps": {
"signals": {},
"number_of_examples": 50,
"receptors_in_repertoire_count": 10,
"generative_model": {
"default_model_name": "humanTRB",
"type": "OLGA"
}
}
}
}
}
},
"instructions": {
"simulation_instruction": {
"type": "LigoSim",
"export_p_gens": False,
"max_iterations": 100,
"sequence_batch_size": 2000,
"simulation": "quickstart_simulation"
}}
}
specs_file = path / "simulation_specs.yaml"
with specs_file.open("w") as file:
yaml.dump(specs, file)
app = ImmuneMLApp(specs_file, path / "result")
app.run()
print("immuneML quickstart: finished generating a synthetic dataset.")
[docs]
def run(self, result_path: str):
result_path = self.build_path(result_path)
logging.basicConfig(filename=Path(result_path) / "log.txt", level=logging.ERROR, format='%(asctime)s %(levelname)s: %(message)s')
warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: logging.warning(message)
self._simulate_dataset_with_signals(result_path / "synthetic_dataset")
print("immuneML quickstart: training a machine learning model...")
specs_file = self.create_specfication(result_path / "machine_learning_analysis")
app = ImmuneMLApp(specs_file, result_path / "machine_learning_analysis/result")
app.run()
print("immuneML quickstart: finished training a machine learning model.")
[docs]
def main():
quickstart = Quickstart()
quickstart.run(sys.argv[1] if len(sys.argv) == 2 else None)
if __name__ == "__main__":
main()