[docs]classOneHotSequenceEncoder(OneHotEncoder):""" One-hot encoded repertoire data is represented in a matrix with dimensions: [sequences, sequence_lengths, one_hot_characters] when use_positional_info is true, the last 3 indices in one_hot_characters represents the positional information: - start position (high when close to start) - middle position (high in the middle of the sequence) - end position (high when close to end) """def_encode_new_dataset(self,dataset:SequenceDataset,params:EncoderParams):encoded_data=self._encode_data(dataset,params)encoded_dataset=dataset.clone()encoded_dataset.encoded_data=encoded_datareturnencoded_datasetdef_encode_data(self,dataset:SequenceDataset,params:EncoderParams):data=dataset.datamax_seq_len=max(getattr(data,params.get_sequence_field_name()).lengths)labels=self._get_labels(data,params)ifparams.encode_labelselseNoneexamples=self._encode_sequence_list(data,pad_n_sequences=len(data),pad_sequence_len=max_seq_len,params=params)feature_names=self._get_feature_names(max_seq_len)ifself.flatten:examples=examples.reshape((len(data),max_seq_len*len(self.onehot_dimensions)))feature_names=[itemforsublistinfeature_namesforiteminsublist]encoded_data=EncodedData(examples=examples,labels=labels,example_ids=dataset.get_example_ids(),feature_names=feature_names,encoding=OneHotEncoder.__name__)returnencoded_datadef_get_feature_names(self,max_seq_len):return[[f"{pos}_{dim}"fordiminself.onehot_dimensions]forposinrange(max_seq_len)]def_get_labels(self,data:AIRRSequenceSet,params:EncoderParams):label_names=params.label_config.get_labels_by_name()labels={name:getattr(data,name).tolist()fornameinlabel_names}returnlabels