X_train, X_test = train_test_split(X, stratify=X[target_antib])
exp_cip = setup(X_train, target_antib, feature_selection=False)
To me the pandas.DataFrame (both X and X_train) looks good. Any idea why that would crash?
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-30-0290d1b00c7f> in <module>
3 X_train, X_test = train_test_split(X, stratify=X[target_antib])
----> 5 exp_cip = setup(X_train, target_antib, feature_selection=False)
~/miniconda3/envs/py3/lib/python3.6/site-packages/pycaret/classification.py in setup(data, target, train_size, sampling, sample_estimator, categorical_features, categorical_imputation, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_interaction, feature_ratio, interaction_threshold, session_id, silent, profile)
929 display_types = display_dtypes_pass, #this is for inferred input box
930 target_transformation = False, #not needed for classification
--> 931 random_state = seed)
933 progress.value += 1
~/miniconda3/envs/py3/lib/python3.6/site-packages/pycaret/preprocess.py in Preprocess_Path_One(train_data, target_variable, ml_usecase, test_data, categorical_features, numerical_features, time_features, features_todrop, display_types, imputation_type, numeric_imputation_strategy, categorical_imputation_strategy, apply_zero_nearZero_variance, club_rare_levels, rara_level_threshold_percentage, apply_untrained_levels_treatment, untrained_levels_treatment_method, apply_ordinal_encoding, ordinal_columns_and_categories, apply_cardinality_reduction, cardinal_method, cardinal_features, apply_binning, features_to_binn, apply_grouping, group_name, features_to_group_ListofList, apply_polynomial_trigonometry_features, max_polynomial, trigonometry_calculations, top_poly_trig_features_to_select_percentage, scale_data, scaling_method, Power_transform_data, Power_transform_method, target_transformation, target_transformation_method, remove_outliers, outlier_contamination_percentage, outlier_methods, apply_feature_selection, feature_selection_top_features_percentage, remove_multicollinearity, maximum_correlation_between_features, remove_perfect_collinearity, apply_feature_interactions, feature_interactions_to_apply, feature_interactions_top_features_to_select_percentage, cluster_entire_data, range_of_clusters_to_try, apply_pca, pca_method, pca_variance_retained_or_number_of_components, random_state)
2538 return(pipe.fit_transform(train_data),pipe.transform(test_data))
2539 else:
-> 2540 return(pipe.fit_transform(train_data))
~/miniconda3/envs/py3/lib/python3.6/site-packages/sklearn/pipeline.py in fit_transform(self, X, y, **fit_params)
383 """
384 last_step = self._final_estimator
--> 385 Xt, fit_params = self._fit(X, y, **fit_params)
386 with _print_elapsed_time('Pipeline',
387 self._log_message(len(self.steps) - 1)):
~/miniconda3/envs/py3/lib/python3.6/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
313 message_clsname='Pipeline',
314 message=self._log_message(step_idx),
--> 315 **fit_params_steps[name])
316 # Replace the transformer of the step with the fitted
317 # transformer. This is necessary when loading the transformer
~/miniconda3/envs/py3/lib/python3.6/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
351 def __call__(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
354 def call_and_shelve(self, *args, **kwargs):
~/miniconda3/envs/py3/lib/python3.6/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
726 with _print_elapsed_time(message_clsname, message):
727 if hasattr(transformer, 'fit_transform'):
--> 728 res = transformer.fit_transform(X, y, **fit_params)
729 else:
730 res = transformer.fit(X, y, **fit_params).transform(X)
~/miniconda3/envs/py3/lib/python3.6/site-packages/pycaret/preprocess.py in fit_transform(self, dataset, y)
328 data= dataset.copy()
329 # since this is for training , we dont nees any transformation since it has already been transformed in fit
--> 330 data = self.fit(data)
332 # additionally we just need to treat the target variable
~/miniconda3/envs/py3/lib/python3.6/site-packages/pycaret/preprocess.py in fit(self, dataset, y)
109 # if data type is bool , convert to categorical
110 for i in data.columns:
--> 111 if data[i].dtype=='bool':
112 data[i] = data[i].astype('object')
~/miniconda3/envs/py3/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5273 return self[name]
-> 5274 return object.__getattribute__(self, name)
5276 def __setattr__(self, name: str, value) -> None:
AttributeError: 'DataFrame' object has no attribute 'dtype'
Just resolved! The dtype attribute just works with pandas Series objects. I figure it out that there is some process in my feat.eng which duplicates columns names. Maybe you should raise some error if data[i].shape[1] > 1 on the setup function.
Btw, thanks! Love your library <3