Source code for immuneML.IO.dataset_export.PickleExporter

# quality: gold

import copy
import platform
import os
import pickle
import shutil
from pathlib import Path
from typing import List

import pandas as pd

from immuneML.IO.dataset_export.DataExporter import DataExporter
from immuneML.data_model.dataset.Dataset import Dataset
from immuneML.data_model.dataset.ReceptorDataset import ReceptorDataset
from immuneML.data_model.dataset.RepertoireDataset import RepertoireDataset
from immuneML.data_model.dataset.SequenceDataset import SequenceDataset
from immuneML.data_model.repertoire.Repertoire import Repertoire
from immuneML.environment.Constants import Constants
from immuneML.util.PathBuilder import PathBuilder


[docs]class PickleExporter(DataExporter):
[docs] @staticmethod def export(dataset: Dataset, path: Path): PathBuilder.build_from_objects(path) exported_dataset = copy.deepcopy(dataset) dataset_name = exported_dataset.name if exported_dataset.name is not None else exported_dataset.identifier dataset_filename = f"{dataset_name}.iml_dataset" if isinstance(dataset, RepertoireDataset): repertoires_path = PathBuilder.build_from_objects(path / "repertoires") exported_repertoires = PickleExporter._export_repertoires(dataset.repertoires, repertoires_path) exported_dataset.repertoires = exported_repertoires exported_dataset.metadata_file = PickleExporter._export_metadata(dataset, path, dataset_filename, repertoires_path) elif isinstance(dataset, SequenceDataset) or isinstance(dataset, ReceptorDataset): exported_dataset.set_filenames(PickleExporter._export_receptors(exported_dataset.get_filenames(), path)) file_path = path / dataset_filename with file_path.open("wb") as file: pickle.dump(exported_dataset, file, pickle.HIGHEST_PROTOCOL) version_path = path / "info.txt" with version_path.open("w") as file: file.writelines(f"immuneML version: {Constants.VERSION}\n" f"Python version: {platform.python_version()}\n" f"Pickle protocol: {pickle.HIGHEST_PROTOCOL}") return exported_dataset
@staticmethod def _export_metadata(dataset, metadata_folder_path: Path, dataset_filename, repertoires_path): if dataset.metadata_file is None or not dataset.metadata_file.is_file(): return None metadata_file = metadata_folder_path / f"{dataset.name}_metadata.csv" if not metadata_file.is_file(): shutil.copyfile(dataset.metadata_file, metadata_file) PickleExporter._update_repertoire_paths_in_metadata(metadata_file, repertoires_path) PickleExporter._add_dataset_to_metadata(metadata_file, dataset_filename) old_metadata_file = metadata_folder_path / "metadata.csv" if old_metadata_file.is_file(): os.remove(old_metadata_file) return metadata_file @staticmethod def _update_repertoire_paths_in_metadata(metadata_file: Path, repertoires_path: Path): metadata = pd.read_csv(metadata_file, comment=Constants.COMMENT_SIGN) path = Path(os.path.relpath(repertoires_path, os.path.dirname(metadata_file))) metadata["filename"] = [path / os.path.basename(name) for name in metadata["filename"].values.tolist()] metadata.to_csv(metadata_file, index=False) @staticmethod def _add_dataset_to_metadata(metadata_file: Path, dataset_filename: str): metadata = pd.read_csv(metadata_file) with metadata_file.open("w") as file: file.writelines([f"{Constants.COMMENT_SIGN}{dataset_filename}\n"]) metadata.to_csv(metadata_file, mode="a", index=False) @staticmethod def _export_receptors(filenames_old: List[str], path: Path) -> List[str]: filenames_new = [] for filename_old in filenames_old: filename_new = PickleExporter._copy_if_exists(filename_old, path) filenames_new.append(filename_new) return filenames_new @staticmethod def _export_repertoires(repertoires: List[Repertoire], repertoires_path: Path) -> List[Repertoire]: new_repertoires = [] for repertoire_old in repertoires: repertoire = copy.deepcopy(repertoire_old) repertoire.data_filename = PickleExporter._copy_if_exists(repertoire_old.data_filename, repertoires_path) repertoire.metadata_filename = PickleExporter._copy_if_exists(repertoire_old.metadata_filename, repertoires_path) new_repertoires.append(repertoire) return new_repertoires @staticmethod def _copy_if_exists(old_file: Path, path: Path): if old_file is not None and old_file.is_file(): new_file = path / old_file.name if not new_file.is_file(): shutil.copyfile(old_file, new_file) return new_file else: raise RuntimeError(f"{PickleExporter.__name__}: tried exporting file {old_file}, but it does not exist.")