[docs]defcreate_model(self,dataset:RepertoireDataset,k:int,vector_size:int,batch_size:int,model_path:Path,sequence_type:SequenceType):fromgensim.modelsimportWord2Vecmodel=Word2Vec(vector_size=vector_size,min_count=1,window=self.window)# creates an empty modelall_kmers=KmerHelper.create_all_kmers(k=k,alphabet=EnvironmentSettings.get_sequence_alphabet())all_kmers=[[kmer]forkmerinall_kmers]model.build_vocab(all_kmers)forkmerinall_kmers:sentences=KmerHelper.create_kmers_within_HD(kmer=kmer[0],alphabet=EnvironmentSettings.get_sequence_alphabet(),distance=1)model.train(corpus_iterable=sentences,total_words=len(all_kmers),epochs=model.epochs)model.save(str(model_path))returnmodel