添加链接
link管理
链接快照平台
  • 输入网页链接,自动生成快照
  • 标签化管理网页链接

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement . We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I am getting this more or less randomly:

X_train, X_test = train_test_split(X, stratify=X[target_antib])
exp_cip = setup(X_train, target_antib, feature_selection=False)

To me the pandas.DataFrame (both X and X_train) looks good. Any idea why that would crash?

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-30-0290d1b00c7f> in <module>
      3 X_train, X_test = train_test_split(X, stratify=X[target_antib])
----> 5 exp_cip = setup(X_train, target_antib, feature_selection=False)
~/miniconda3/envs/py3/lib/python3.6/site-packages/pycaret/classification.py in setup(data, target, train_size, sampling, sample_estimator, categorical_features, categorical_imputation, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_interaction, feature_ratio, interaction_threshold, session_id, silent, profile)
    929                                           display_types = display_dtypes_pass, #this is for inferred input box
    930                                           target_transformation = False, #not needed for classification
--> 931                                           random_state = seed)
    933     progress.value += 1
~/miniconda3/envs/py3/lib/python3.6/site-packages/pycaret/preprocess.py in Preprocess_Path_One(train_data, target_variable, ml_usecase, test_data, categorical_features, numerical_features, time_features, features_todrop, display_types, imputation_type, numeric_imputation_strategy, categorical_imputation_strategy, apply_zero_nearZero_variance, club_rare_levels, rara_level_threshold_percentage, apply_untrained_levels_treatment, untrained_levels_treatment_method, apply_ordinal_encoding, ordinal_columns_and_categories, apply_cardinality_reduction, cardinal_method, cardinal_features, apply_binning, features_to_binn, apply_grouping, group_name, features_to_group_ListofList, apply_polynomial_trigonometry_features, max_polynomial, trigonometry_calculations, top_poly_trig_features_to_select_percentage, scale_data, scaling_method, Power_transform_data, Power_transform_method, target_transformation, target_transformation_method, remove_outliers, outlier_contamination_percentage, outlier_methods, apply_feature_selection, feature_selection_top_features_percentage, remove_multicollinearity, maximum_correlation_between_features, remove_perfect_collinearity, apply_feature_interactions, feature_interactions_to_apply, feature_interactions_top_features_to_select_percentage, cluster_entire_data, range_of_clusters_to_try, apply_pca, pca_method, pca_variance_retained_or_number_of_components, random_state)
   2538     return(pipe.fit_transform(train_data),pipe.transform(test_data))
   2539   else:
-> 2540     return(pipe.fit_transform(train_data))
~/miniconda3/envs/py3/lib/python3.6/site-packages/sklearn/pipeline.py in fit_transform(self, X, y, **fit_params)
    383         """
    384         last_step = self._final_estimator
--> 385         Xt, fit_params = self._fit(X, y, **fit_params)
    386         with _print_elapsed_time('Pipeline',
    387                                  self._log_message(len(self.steps) - 1)):
~/miniconda3/envs/py3/lib/python3.6/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
    313                 message_clsname='Pipeline',
    314                 message=self._log_message(step_idx),
--> 315                 **fit_params_steps[name])
    316             # Replace the transformer of the step with the fitted
    317             # transformer. This is necessary when loading the transformer
~/miniconda3/envs/py3/lib/python3.6/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
    351     def __call__(self, *args, **kwargs):
--> 352         return self.func(*args, **kwargs)
    354     def call_and_shelve(self, *args, **kwargs):
~/miniconda3/envs/py3/lib/python3.6/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
    726     with _print_elapsed_time(message_clsname, message):
    727         if hasattr(transformer, 'fit_transform'):
--> 728             res = transformer.fit_transform(X, y, **fit_params)
    729         else:
    730             res = transformer.fit(X, y, **fit_params).transform(X)
~/miniconda3/envs/py3/lib/python3.6/site-packages/pycaret/preprocess.py in fit_transform(self, dataset, y)
    328     data= dataset.copy()
    329     # since this is for training , we dont nees any transformation since it has already been transformed in fit
--> 330     data = self.fit(data)
    332     # additionally we just need to treat the target variable
~/miniconda3/envs/py3/lib/python3.6/site-packages/pycaret/preprocess.py in fit(self, dataset, y)
    109     # if data type is bool , convert to categorical
    110     for i in data.columns:
--> 111       if data[i].dtype=='bool':
    112         data[i] = data[i].astype('object')
~/miniconda3/envs/py3/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5272             if self._info_axis._can_hold_identifiers_and_holds_name(name):
   5273                 return self[name]
-> 5274             return object.__getattribute__(self, name)
   5276     def __setattr__(self, name: str, value) -> None:
AttributeError: 'DataFrame' object has no attribute 'dtype'
          

Just resolved! The dtype attribute just works with pandas Series objects. I figure it out that there is some process in my feat.eng which duplicates columns names. Maybe you should raise some error if data[i].shape[1] > 1 on the setup function.

Btw, thanks! Love your library <3