[docs]classShannonDiversityEncoder(DatasetEncoder):""" ShannonDiversity encoder calculates the Shannon diversity index for each repertoire in a dataset. The diversity is computed as: .. math:: diversity = - \\sum_{i=1}^{n} p_i \\log(p_i) where :math:`p_i` is the clonal count for each unique sequence in the repertoire (from duplicate_count field) divided by the total clonal counts, and :math:`n` is the total number of clonotypes (sequences) in the repertoire. **Dataset type:** - RepertoireDataset **Specification arguments:** No arguments are needed for this encoder. **YAML specification:** .. indent with spaces .. code-block:: yaml definitions: encodings: shannon_div_enc: ShannonDiversity """def__init__(self,name:str=None):super().__init__(name=name)
[docs]@staticmethoddefbuild_object(dataset:Dataset,**params):assertisinstance(dataset,RepertoireDataset), \
f"{ShannonDiversityEncoder.__name__}: Dataset must be of type RepertoireDataset, but got {type(dataset)}."returnShannonDiversityEncoder(**params)
[docs]defencode(self,dataset,params:EncoderParams)->Dataset:assertisinstance(dataset,RepertoireDataset), \
f"{ShannonDiversityEncoder.__name__}: Dataset must be of type RepertoireDataset, but got {type(dataset)}."examples=CacheHandler.memo_by_params((dataset.identifier,ShannonDiversityEncoder.__name__,params.label_config.get_labels_by_name()ifparams.encode_labelselse''),lambda:self._encode(dataset,params))encoded_dataset=dataset.clone()encoded_dataset.encoded_data=EncodedData(examples=examples,labels=dataset.get_metadata(params.label_config.get_labels_by_name())ifparams.encode_labelselse{},example_ids=dataset.get_example_ids(),encoding=ShannonDiversityEncoder.__name__)returnencoded_dataset