[docs]@staticmethoddefexport_zip(cl_item:ClusteringItem,path:Path,setting_key:str)->Path:""" Export a clustering setting (encoder, dim reduction if any, clustering method) as a zip file. Args: cl_item: The ClusteringItem containing the fitted encoder and clustering method path: The directory where the export will be stored setting_key: A key identifying this setting (used for naming) Returns: Path to the created zip file """state_path=path.absolute()export_path=ClusteringExporter.export(cl_item,state_path/"fitted")abs_zip_path=Path(shutil.make_archive(str(state_path/setting_key),"zip",str(export_path))).absolute()returnabs_zip_path
[docs]@staticmethoddefexport(cl_item:ClusteringItem,path:Path)->Path:""" Export a clustering item's components to disk. Args: cl_item: The ClusteringItem to export path: The directory where files will be stored Returns: Path to the export directory """PathBuilder.build(path)# Store encoderencoder_filename=ClusteringExporter._store_encoder(cl_item.encoder,path).name# Store clustering methodmethod_filename=ClusteringExporter._store_clustering_method(cl_item.method,path).name# Store classifier (sklearn object for result-based validation)classifier_filename=Noneifcl_item.classifierisnotNone:classifier_filename=ClusteringExporter._store_classifier(cl_item.classifier,path).name# Store dim reduction method if present (use the fitted one from cl_item)dim_red_filename=Noneifcl_item.dim_red_methodisnotNone:dim_red_filename=ClusteringExporter._store_dim_reduction(cl_item.dim_red_method,path).name# Create configurationconfig=ClusteringExporter._create_config(cl_item,encoder_filename,method_filename,dim_red_filename,classifier_filename)config_path=path/'clustering_config.yaml'withconfig_path.open('w')asf:yaml.dump(config,f)returnpath
@staticmethoddef_store_encoder(encoder:DatasetEncoder,path:Path)->Path:filename=path/"encoder.pickle"type(encoder).store_encoder(encoder,filename)returnfilename@staticmethoddef_store_clustering_method(method,path:Path)->Path:filename=path/"clustering_method.pickle"withfilename.open("wb")asfile:pickle.dump(method,file)returnfilename@staticmethoddef_store_dim_reduction(dim_red_method,path:Path)->Path:filename=path/"dim_reduction.pickle"withfilename.open("wb")asfile:pickle.dump(dim_red_method,file)returnfilename@staticmethoddef_store_classifier(classifier,path:Path)->Path:"""Store the sklearn classifier used for result-based validation."""filename=path/"classifier.pickle"withfilename.open("wb")asfile:pickle.dump(classifier,file)returnfilename@staticmethoddef_create_config(cl_item:ClusteringItem,encoder_filename:str,method_filename:str,dim_red_filename:str=None,classifier_filename:str=None)->dict:"""Create a configuration dictionary for the clustering export."""config={'encoding_file':encoder_filename,'encoding_class':type(cl_item.encoder).__name__ifcl_item.encoderelseNone,'clustering_method_file':method_filename,'clustering_method_class':type(cl_item.method).__name__ifcl_item.methodelseNone,'clustering_method_name':cl_item.method.nameifcl_item.methodelseNone,'classifier_filename':classifier_filename}ifcl_item.cl_setting:config.update({'setting_key':cl_item.cl_setting.get_key(),'encoder_name':cl_item.cl_setting.encoder_name,'encoder_params':cl_item.cl_setting.encoder_params,'clustering_params':cl_item.cl_setting.clustering_params,})ifcl_item.dim_red_methodisnotNone:config.update({'dim_reduction_file':dim_red_filename,'dim_reduction_class':type(cl_item.dim_red_method).__name__,'dim_reduction_name':cl_item.cl_setting.dim_red_name,'dim_reduction_params':cl_item.cl_setting.dim_red_params,})ifcl_item.dataset:config.update({'discovery_dataset_id':cl_item.dataset.identifier,'discovery_dataset_name':cl_item.dataset.name,})returnconfig