Source code for immuneML.encodings.kmer_frequency.KmerFreqSequenceEncoder
from immuneML.data_model.datasets.ElementDataset import SequenceDataset
from immuneML.encodings.EncoderParams import EncoderParams
from immuneML.encodings.kmer_frequency.BNPSequenceEncodingStrategies import (
dispatch_encoding, get_v_genes, kmer_weights, seq_field, V_GENE_ENCODING_TYPES,
)
from immuneML.encodings.kmer_frequency.KmerFrequencyEncoder import KmerFrequencyEncoder
from immuneML.util.EncoderHelper import EncoderHelper
[docs]
class KmerFreqSequenceEncoder(KmerFrequencyEncoder):
def _encode_locus(self, dataset):
return len(set(dataset.data.locus.tolist())) > 1
def _encode_new_dataset(self, dataset, params: EncoderParams):
encoded_data = self._encode_data(dataset, params)
encoded_dataset = dataset.clone()
encoded_dataset.encoded_data = encoded_data
return encoded_dataset
def _encode_examples(self, dataset: SequenceDataset, params: EncoderParams):
data = dataset.data
seq_array = getattr(data, seq_field(self.region_type, self.sequence_type))
encode_locus = self._encode_locus(dataset)
locus_labels = data.locus.tolist() if encode_locus else None
v_genes = get_v_genes(data) if self.sequence_encoding in V_GENE_ENCODING_TYPES else None
flat_kmers, row_ids = dispatch_encoding(
seq_array, self.sequence_encoding,
self.k, self.k_left, self.k_right, self.min_gap, self.max_gap,
self.region_type, v_genes, locus_labels,
)
labels = (EncoderHelper.encode_element_dataset_labels(dataset, params.label_config)
if params.encode_labels else None)
return flat_kmers, row_ids, kmer_weights(data, self.reads, row_ids), data.sequence_id.tolist(), labels