[docs]classLogRegressionCustomPenalty(MLMethod):""" Logistic Regression with custom penalty factors for specific features. **Specification arguments**: - non_penalized_features (list): List of feature names that should not be penalized. - non_penalized_encodings (list): List of encoding names (class names) whose features should not be penalized. This parameter can be used only in combination with CompositeEncoder. None fo the features from the specified encodings will be penalized. If both non_penalized_features and non_penalized_encodings are provided, the union of the two will be used. Other supported arguments are inherited from LogitNet of python-glmnet package and will be directly passed to it. n_jobs will be overwritten to use the number of CPUs specified for the instruction (e.g. in TrainMLModel). **YAML specification:** .. code-block:: yaml ml_methods: custom_log_reg: LogRegressionCustomPenalty: alpha: 1 n_lambda: 100 non_penalized_features: [] non_penalized_encodings: ['Metadata'] random_state: 42 """def__init__(self,non_penalized_features:list=None,name:str=None,label:Label=None,non_penalized_encodings:list=None,**kwargs):super().__init__(name=name,label=label)self.non_penalized_features=non_penalized_featuresifnon_penalized_featuresisnotNoneelse[]self.non_penalized_encodings=non_penalized_encodingsifnon_penalized_encodingsisnotNoneelse[]forind,encodinginenumerate(self.non_penalized_encodings):if'Encoder'notinencoding:self.non_penalized_encodings[ind]=encoding+'Encoder'self.model=Noneself.feature_names=Noneself.kwargs=kwargsdef_fit(self,encoded_data:EncodedData,cores_for_training:int=2):X=encoded_data.examplesy=Util.map_to_new_class_values(encoded_data.labels[self.label.name],self.class_mapping)self.feature_names=encoded_data.feature_namesifencoded_data.encoding=='CompositeEncoder'andself.non_penalized_encodings:features_from_non_penalized_encodings=encoded_data.feature_annotations[encoded_data.feature_annotations['encoder'].isin(self.non_penalized_encodings)]['feature'].tolist()non_penalized_features=list(set(features_from_non_penalized_encodings))non_penalized_features.extend(self.non_penalized_features)self.non_penalized_features=list(set(non_penalized_features))logging.info(f"{self.__class__.__name__}: inferred non-penalized features: {self.non_penalized_features}")# Create penalty factor vectorpenalty_factor=np.ones(X.shape[1])foridx,featureinenumerate(self.feature_names):iffeatureinself.non_penalized_features:penalty_factor[idx]=0.0self.model=LogitNet(**self.kwargs,**{'n_jobs':cores_for_training})self.model.fit(X,y,relative_penalties=penalty_factor)def_predict(self,encoded_data:EncodedData):predictions=self.model.predict(encoded_data.examples)return{self.label.name:Util.map_to_old_class_values(np.array(predictions),self.class_mapping)}def_predict_proba(self,encoded_data:EncodedData):class_names=Util.map_to_old_class_values(self.model.classes_,self.class_mapping)probabilities=self.model.predict_proba(encoded_data.examples)return{self.label.name:{class_name:probabilities[:,i]fori,class_nameinenumerate(class_names)}}