[docs]classCompositeEncoder(DatasetEncoder):""" This encoder allows to combine multiple different encodings together, for example, KmerFrequency encoder with VGeneEncoder. The parameters for the different encoders are passed as a list of dictionaries, where each dictionary contains the parameters for one encoder. The different encoders are applied sequentially and their results concatenated together. **Dataset type:** - SequenceDatasets - ReceptorDatasets - RepertoireDatasets .. note:: To combine multiple encodings (e.g., GeneFrequency and KmerFrequency), keep in mind how the ML method will use the encoded data downstream. Currently, the recommended way to use CompositeEncoder is with :ref:`LogRegressionCustomPenalty`, where you can specify which features should not be penalized. **Specification arguments:** - encoders (list): A list of dictionaries, where each dictionary contains the parameters for one encoder. **YAML specification:** .. code-block:: yaml encodings: my_composite_encoding: Composite: encoders: - KmerFrequency: k: 3 - GeneFrequency: genes: [V] normalization_type: relative_frequency scale_to_unit_variance: true scale_to_zero_mean: true """def__init__(self,encoders:List[DatasetEncoder],name:str=None):super().__init__(name=name)self.encoders=encoders
[docs]@staticmethoddefbuild_object(dataset:Dataset,**params):assert'encoders'inparams,"Parameter 'encoders' must be provided for CompositeEncoder."ParameterValidator.assert_all_type_and_value(params['encoders'],dict,"CompositeEncoder",'encoders')name=params.get('name','composite')encoders=[]forstep,encoder_specsinenumerate(params['encoders']):cls_name=list(encoder_specs.keys())[0]+'Encoder'encoder=ReflectionHandler.get_class_by_name(cls_name,'encodings')default_params=DefaultParamsLoader.load('encodings',cls_name.replace('Encoder',''))encoder_instance=encoder.build_object(dataset,**{**default_params,**encoder_specs[list(encoder_specs.keys())[0]]})encoder_instance.name=f"{name}_step_{step+1}_{encoder_instance.nameorcls_name}"encoders.append(encoder_instance)returnCompositeEncoder(encoders=encoders,name=name)