Source code for immuneML.encodings.kmer_frequency.KmerFreqReceptorEncoder

from collections import Counter

from immuneML.data_model.SequenceParams import Chain
from immuneML.data_model.datasets.ElementDataset import ReceptorDataset
from immuneML.encodings.EncoderParams import EncoderParams
from immuneML.encodings.kmer_frequency.KmerFrequencyEncoder import KmerFrequencyEncoder


[docs] class KmerFreqReceptorEncoder(KmerFrequencyEncoder): def _encode_locus(self, dataset): return True def _encode_new_dataset(self, dataset, params: EncoderParams): encoded_data = self._encode_data(dataset, params) encoded_dataset = dataset.clone() encoded_dataset.encoded_data = encoded_data return encoded_dataset def _encode_examples(self, dataset: ReceptorDataset, params: EncoderParams): encoded_receptors_counts = [] receptor_ids = [] label_config = params.label_config labels = {label: [] for label in label_config.get_labels_by_name()} if params.encode_labels else None params.region_type = self.region_type encode_locus = self._encode_locus(dataset) sequence_encoder = self._prepare_sequence_encoder() for receptor in dataset.get_data(region_type=self.region_type): chains = [Chain.get_chain(chain).name.lower() for chain in receptor.chain_pair.value] counts = {chain: Counter() for chain in chains} for chain in chains: counts[chain] = self._encode_sequence(getattr(receptor, chain), params, sequence_encoder, counts[chain], encode_locus) encoded_receptors_counts.append(counts[chains[0]] + counts[chains[1]]) receptor_ids.append(receptor.receptor_id) if params.encode_labels: for label_name in label_config.get_labels_by_name(): label = receptor.metadata[label_name] labels[label_name].append(label) return encoded_receptors_counts, receptor_ids, labels