问题
I've been attempting to fit a Grid Search K Nearest neighbors Classifier, but am receiving the following Error message
TypeError : '<' not supported between instances of 'str' and 'int'
X_train
compact sa area roofM3 h o glaz glazing_area_distribution
0 0.66 759.5 318.5 220.50 3.5 2 0.40 3
1 0.76 661.5 416.5 122.50 7.0 3 0.10 1
2 0.66 759.5 318.5 220.50 3.5 3 0.10 1
3 0.74 686.0 245.0 220.50 3.5 5 0.10 4
4 0.64 784.0 343.0 220.50 3.5 2 0.40 4
... ... ... ... ... ... ... ... ...
609 0.98 514.5 294.0 110.25 7.0 4 0.40 2
X_train.describe()
count 614.000000 614.000000 614.000000 614.000000 614.000000 614.000000 614.000000 614.000000
mean 0.762606 673.271173 319.617264 176.826954 5.227199 3.495114 0.236645 2.802932
std 0.106725 88.757699 43.705256 45.499990 1.751278 1.124751 0.133044 1.571128
min 0.620000 514.500000 245.000000 110.250000 3.500000 2.000000 0.000000 0.000000
25% 0.660000 612.500000 294.000000 122.500000 3.500000 2.000000 0.100000 1.000000
75% 0.820000 759.500000 343.000000 220.500000 7.000000 4.000000 0.400000 4.000000
max 0.980000 808.500000 416.500000 220.500000 7.000000 5.000000 0.400000 5.000000
y_train
0 15.16
1 32.12
2 11.69
3 10.14
4 19.06
...
609 32.24
Attempting to create and fit a model
from sklearn.model_selection import KFold
model = KFold()
cv_object = KFold(n_splits=5, shuffle=True, random_state=50)
grid_values = {'n_neighbors': ['1','2','3','4','5'],
'weights': ['uniform', 'distance']
}
from sklearn.model_selection import GridSearchCV
model = KNeighborsRegressor()
grid_estimator = GridSearchCV(KNeighborsRegressor(), cv=cv_object,
param_grid=grid_values,
scoring='neg_mean_absolute_error')
**grid_estimator.fit(X_train, y_train)**
I have attempted to transform my data with label and OneHotEncoder, and dropped the discrete features, but still receive the same error message:
TypeError Traceback (most recent call last)
<ipython-input-143-3054acff691c> in <module>
19 ohe = OneHotEncoder (df.iloc [-4, -2],
20 sparse = False, handle_unknown = "ignore")
---> 21 df_processed_np = ohe . fit_transform ( df )
~ \ anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ _encoders.py in fit_transform (self, X, y)
408 "" "
409 self . _validate_keywords ( )
-> 410 return super ( ) . fit_transform ( X , y ) 411 412 def transform ( self , X ) :
~ \ anaconda3 \ lib \ site-packages \ sklearn \ base.py in fit_transform (self, X, y, ** fit_params)
688 if y is None :
689 # fit method of arity 1 (unsupervised transformation)
-> 690 return self . fit ( X , ** fit_params ) . transform ( X ) 691 else : 692 # fit method of arity 2 (supervised transformation)
~ \ anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ _encoders.py in fit (self, X, y)
383 "" "
384 self . _validate_keywords ( )
-> 385 self . _fit ( X , handle_unknown = self . handle_unknown )
386 self . drop_idx_ = self . _compute_drop_idx ( )
387 return self
~ \ anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ _encoders.py in _fit (self, X, handle_unknown)
75
76 if self . categories ! = 'auto' :
---> 77 if len ( self . categories ) ! = n_features : 78 raise ValueError ("Shape mismatch: if categories is an array,"
79 "it has to be of shape (n_features,) . ")
TypeError : object of type 'numpy.int64' has no len ()
Attempt to use labelEncoder and OneHotEncoder:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
cat_columns = df[['orientation', 'glazing_area_distribution']]
label_encoders = {}
for col in cat_columns:
print("Encoding {}".format(col))
new_le = LabelEncoder()
df[col] = new_le.fit_transform(df[col])
label_encoders[col] = new_le
df.index.to_frame().reset_index(drop=True)
#cat_columns_idx = [df.get_loc(col)
# for col in cat_columns]
ohe = OneHotEncoder(df.iloc[-4,-2],
sparse=False, handle_unknown="ignore")
df_processed_np = ohe.fit_transform(df)
Error after Categorical/discrete columns were dropped:
warnings.warn ("Estimator fit failed. The score on this train-test"
C: \ Users \ SamWinter \ anaconda3 \ lib \ site-packages \ sklearn \ model_selection \ _validation.py: 548: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "C: \ Users \ SamWinter \ anaconda3 \ lib \ site-packages \ sklearn \ model_selection \ _validation.py", line 531, in _fit_and_score
estimator.fit (X_train, y_train, ** fit_params)
File "C: \ Users \ SamWinter \ anaconda3 \ lib \ site-packages \ sklearn \ neighbors \ _base.py", line 1110, in fit
return self._fit (X)
File "C: \ Users \ SamWinter \ anaconda3 \ lib \ site-packages \ sklearn \ neighbors \ _base.py", line 439, in _fit
self.n_neighbors <self._fit_X.shape [0] // 2) and
TypeError: '<' not supported between instances of 'str' and 'int'
warnings.warn ("Estimator fit failed. The score on this train-test"
-------------------------------------------------- -------------------------
TypeError Traceback (most recent call last)
<ipython-input-168-5fa0b664280a> in <module>
18 #random_search = RandomizedSearchCV (k_model, param_distributions = param_grid,
19 # n_iter = 10, cv = 5, scoring = 'accuracy')
---> 20 gridsearch . fit ( X_train2 , y_train )
~ \ anaconda3 \ lib \ site-packages \ sklearn \ utils \ validation.py in inner_f (* args, ** kwargs)
70 FutureWarning)
71 kwargs . update ( { k : arg for k , arg in zip ( sig . parameters , args ) } )
---> 72 return f ( ** kwargs ) 73 return inner_f
74
~ \ anaconda3 \ lib \ site-packages \ sklearn \ model_selection \ _search.py in fit (self, X, y, groups, ** fit_params)
763 refit_start_time = time . time ( )
764 if y is not None :
-> 765 self . best_estimator_ . fit ( X , y , ** fit_params )
766 else :
767 self . best_estimator_ . fit (X , ** fit_params )
~ \ anaconda3 \ lib \ site-packages \ sklearn \ neighbors \ _base.py in fit (self, X, y)
1108 multi_output = True)
1109 self . _y = y
-> 1110 return self . _fit ( X ) 1111 1112 def _more_tags ( self ) :
~ \ anaconda3 \ lib \ site-packages \ sklearn \ neighbors \ _base.py in _fit (self, X)
437 # and KDTree is generally faster when available
438 if ((self.n_neighbors is None or
-> 439 self.n_neighbors <self._fit_X.shape [0] // 2) and
440 self.metric! = 'precomputed'):
441 if self . effective_metric_ in VALID_METRICS [ 'kd_tree' ] :
TypeError : '<' not supported between instances of 'str' and 'int'
来源:https://stackoverflow.com/questions/66070889/attempting-to-fit-a-grid-estimator-recieving-typeerror-not-supported-betw