Toggle Light / Dark / Auto color theme
Toggle table of contents sidebar
Source code for immuneML.workflows.steps.DataSampler
import random
from dataclasses import dataclass
from pathlib import Path
from typing import List
from immuneML.data_model.datasets.Dataset import Dataset
from immuneML.hyperparameter_optimization.config.SampleConfig import SampleConfig
from immuneML.workflows.steps.Step import Step
from immuneML.workflows.steps.StepParams import StepParams
[docs]
@dataclass
class DataSamplerParams ( StepParams ):
"""Parameters for DataSampler step."""
dataset : Dataset
split_config : SampleConfig
paths : List [ Path ] = None
[docs]
class DataSampler ( Step ):
"""DataSampler step that samples data from datasets based on specified strategies. Currently, only random
subsampling without replacement is supported."""
[docs]
@staticmethod
def run ( input_params : DataSamplerParams = None ):
subsampled_datasets = []
if input_params . split_config . random_seed :
random . seed ( input_params . split_config . random_seed )
for i in range ( input_params . split_config . split_count ):
indices = list ( range ( input_params . dataset . get_example_count ()))
sampled_indices = random . sample ( indices , k = round ( len ( indices ) * input_params . split_config . percentage ))
sampled_dataset = input_params . dataset . make_subset ( sampled_indices , input_params . paths [ i ], Dataset . SUBSAMPLED )
subsampled_datasets . append ( sampled_dataset )
return subsampled_datasets