问题
I want to subclass sklearn.svm.LinearSVC
and use it as an estimator for sklearn.model_selection.GridSearchCV
. I had some issues with subclassing earlier and I thought I fixed it based on my previous post and the selected answer.
However, now my objective is to create an sklearn.kernel_approximation.RBFSampler
object as an attribute of my new class. Now this is an example and I have a broader question here which is:
GridSearchCV
, how can I create attributes based on argument values passed into the constructor function (or lack thereof)? So far, I have attempted something like the below:
from sklearn.datasets import make_classification
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.kernel_approximation import RBFSampler
from sklearn.datasets import load_breast_cancer
RANDOM_STATE = 123
class LinearSVCSub(LinearSVC):
def __init__(self, penalty='l2', loss='squared_hinge', sampler_gamma=None, sampler_n=None,
dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1,
class_weight=None, verbose=0, random_state=None, max_iter=1000):
super(LinearSVCSub, self).__init__(penalty=penalty, loss=loss, dual=dual, tol=tol,
C=C, multi_class=multi_class, fit_intercept=fit_intercept,
intercept_scaling=intercept_scaling, class_weight=class_weight,
verbose=verbose, random_state=random_state, max_iter=max_iter)
self.sampler_gamma = sampler_gamma
self.sampler_n = sampler_n
# I have also tried a conditional statement here instead of
# within a separate function create_sampler()
self.sampler = create_sampler()
def fit(self, X, y, sample_weight=None):
X = self.transform_this(X)
super(LinearSVCSub, self).fit(X, y, sample_weight)
return self
def predict(self, X):
X = self.transform_this(X)
return super(LinearSVCSub, self).predict(X)
def score(self, X, y, sample_weight=None):
X = self.transform_this(X)
return super(LinearSVCSub, self).score(X, y, sample_weight)
def decision_function(self, X):
X = self.transform_this(X)
return super(LinearSVCSub, self).decision_function(X)
def transform_this(self, X):
if self.sampler is not None:
X = sampler.fit_transform(X)
return X
def create_sampler(self):
# If sampler_gamma and sampler_n have been given, create a sampler
if (self.sampler_gamma is not None) and (self.sampler_n is not None):
sampler = RBFSampler(gamma=self.sampler_gamma, n_components=self.sampler_n)
else:
sampler = None
return sampler
if __name__ == '__main__':
data = load_breast_cancer()
X, y = data.data, data.target
# Parameter tuning with custom LinearSVC
param_grid = {'C': [0.00001, 0.0005],
'dual': (True, False), 'random_state': [RANDOM_STATE],
'sampler_gamma': [0.90, 0.60, 0.30],
'sampler_n': [10, 200]}
gs_model = GridSearchCV(estimator=LinearSVCSub(), verbose=1, param_grid=param_grid,
scoring='roc_auc', n_jobs=-1, cv=2)
gs_model.fit(X, y)
gs_model.cv_results_
However, as I have learnt here, GridSearchCV initiates the estimator objects with the default values first and has a similar implementation to the feature_importances_ attribute in sklearn.tree.DecisionTreeClassifier.
Also, the error that I get from the above code is:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-6-a11420cc931e> in <module>
66 'sampler_n': [10, 200]}
67
---> 68 gs_model = GridSearchCV(estimator=LinearSVCSub(), verbose=1, param_grid=param_grid,
69 scoring='roc_auc', n_jobs=-1, cv=2)
70 gs_model.fit(X, y)
<ipython-input-6-a11420cc931e> in __init__(self, penalty, loss, sampler_gamma, sampler_n, dual, tol, C, multi_class, fit_intercept, intercept_scaling, class_weight, verbose, random_state, max_iter)
21 self.sampler_n = sampler_n
22
---> 23 self.sampler = create_sampler()
24
25
NameError: name 'create_sampler' is not defined
回答1:
- Use
__init__
constructor as a container to store the attributes. - Do all the corresponding logic in methods
from sklearn.datasets import make_classification
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.kernel_approximation import RBFSampler
from sklearn.datasets import load_breast_cancer
RANDOM_STATE = 123
class LinearSVCSub(LinearSVC):
def __init__(self, penalty='l2', loss='squared_hinge', sampler_gamma=None, sampler_n=None,
dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1,
class_weight=None, verbose=0, random_state=None, max_iter=1000, sampler=None):
super(LinearSVCSub, self).__init__(penalty=penalty, loss=loss, dual=dual, tol=tol,
C=C, multi_class=multi_class, fit_intercept=fit_intercept,
intercept_scaling=intercept_scaling, class_weight=class_weight,
verbose=verbose, random_state=random_state, max_iter=max_iter)
self.sampler_gamma = sampler_gamma
self.sampler_n = sampler_n
self.sampler = sampler
def fit(self, X, y, sample_weight=None):
X = self.transform_this(X)
super(LinearSVCSub, self).fit(X, y, sample_weight)
return self
def predict(self, X):
X = self.transform_this(X)
return super(LinearSVCSub, self).predict(X)
def score(self, X, y, sample_weight=None):
X = self.transform_this(X)
return super(LinearSVCSub, self).score(X, y, sample_weight)
def decision_function(self, X):
X = self.transform_this(X)
return super(LinearSVCSub, self).decision_function(X)
def transform_this(self, X):
if self.sampler:
X = RBFSampler(gamma=self.sampler_gamma, n_components=self.sampler_n).fit_transform(X)
return X
data = load_breast_cancer()
X, y = data.data, data.target
# Parameter tuning with custom LinearSVC
param_grid = {'C': [0.00001, 0.0005],
'dual': (True, False), 'random_state': [RANDOM_STATE],
'sampler_gamma': [0.90, 0.60, 0.30],
'sampler_n': [10, 200],
'sampler':[0,1]
}
gs_model = GridSearchCV(estimator=LinearSVCSub(sampler=1), verbose=1, param_grid=param_grid,
scoring='roc_auc', n_jobs=-1, cv=2)
gs_model.fit(X, y)
gs_model.cv_results_
来源:https://stackoverflow.com/questions/64532894/how-to-create-a-subclass-with-class-attributes-based-on-constructor-function-arg