Source code for immuneML.encodings.kmer_frequency.KmerFreqSequenceEncoder

from collections import Counter

from immuneML.encodings.EncoderParams import EncoderParams
from immuneML.encodings.kmer_frequency.KmerFrequencyEncoder import KmerFrequencyEncoder


[docs] class KmerFreqSequenceEncoder(KmerFrequencyEncoder): def _encode_new_dataset(self, dataset, params: EncoderParams): encoded_data = self._encode_data(dataset, params) encoded_dataset = dataset.clone() encoded_dataset.encoded_data = encoded_data return encoded_dataset def _encode_examples(self, dataset, params: EncoderParams): encoded_sequences = [] sequence_ids = [] label_config = params.label_config labels = {label: [] for label in label_config.get_labels_by_name()} if params.encode_labels else None sequence_encoder = self._prepare_sequence_encoder() feature_names = sequence_encoder.get_feature_names(params) for sequence in dataset.get_data(): counts = self._encode_sequence(sequence, params, sequence_encoder, Counter()) encoded_sequences.append(counts) sequence_ids.append(sequence.sequence_id) if params.encode_labels: for label_name in label_config.get_labels_by_name(): label = sequence.metadata[label_name] labels[label_name].append(label) return encoded_sequences, sequence_ids, labels, feature_names