Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG]: Failed to set the disabled "data_split_shuffle" option in classification. #3975

Open
3 tasks done
m-kaka opened this issue Apr 17, 2024 · 0 comments
Open
3 tasks done
Labels
bug Something isn't working

Comments

@m-kaka
Copy link

m-kaka commented Apr 17, 2024

pycaret version checks

Issue Description

In case of classification model, it failed to set the "data_split_shuffle" option as disabled.

Reproducible Example

from pycaret.datasets import get_data
from pycaret.classification import *
data = get_data('iris')
s = setup(data, target = 'species', data_split_shuffle = False, session_id = 123)

Expected Behavior

Work if disabled the data_split_shuffle option.

Actual Results

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[1], line 4
      2 from pycaret.classification import *
      3 data = get_data('iris')
----> 4 s = setup(data, target = 'species', data_split_shuffle = False, session_id = 123)

File /usr/local/lib/python3.10/site-packages/pycaret/classification/functional.py:595, in setup(data, data_func, target, index, train_size, test_data, ordinal_features, numeric_features, categorical_features, date_features, text_features, ignore_features, keep_features, preprocess, create_date_columns, imputation_type, numeric_imputation, categorical_imputation, iterative_imputation_iters, numeric_iterative_imputer, categorical_iterative_imputer, text_features_method, max_encoding_ohe, encoding_method, rare_to_value, rare_value, polynomial_features, polynomial_degree, low_variance_threshold, group_features, drop_groups, remove_multicollinearity, multicollinearity_threshold, bin_numeric_features, remove_outliers, outliers_method, outliers_threshold, fix_imbalance, fix_imbalance_method, transformation, transformation_method, normalize, normalize_method, pca, pca_method, pca_components, feature_selection, feature_selection_method, feature_selection_estimator, n_features_to_select, custom_pipeline, custom_pipeline_position, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, html, session_id, system_log, log_experiment, experiment_name, experiment_custom_tags, log_plots, log_profile, log_data, verbose, memory, profile, profile_kwargs)
    593 exp = _EXPERIMENT_CLASS()
    594 set_current_experiment(exp)
--> 595 return exp.setup(
    596     data=data,
    597     data_func=data_func,
    598     target=target,
    599     index=index,
    600     train_size=train_size,
    601     test_data=test_data,
    602     ordinal_features=ordinal_features,
    603     numeric_features=numeric_features,
    604     categorical_features=categorical_features,
    605     date_features=date_features,
    606     text_features=text_features,
    607     ignore_features=ignore_features,
    608     keep_features=keep_features,
    609     preprocess=preprocess,
    610     create_date_columns=create_date_columns,
    611     imputation_type=imputation_type,
    612     numeric_imputation=numeric_imputation,
    613     categorical_imputation=categorical_imputation,
    614     iterative_imputation_iters=iterative_imputation_iters,
    615     numeric_iterative_imputer=numeric_iterative_imputer,
    616     categorical_iterative_imputer=categorical_iterative_imputer,
    617     text_features_method=text_features_method,
    618     max_encoding_ohe=max_encoding_ohe,
    619     encoding_method=encoding_method,
    620     rare_to_value=rare_to_value,
    621     rare_value=rare_value,
    622     polynomial_features=polynomial_features,
    623     polynomial_degree=polynomial_degree,
    624     low_variance_threshold=low_variance_threshold,
    625     group_features=group_features,
    626     drop_groups=drop_groups,
    627     remove_multicollinearity=remove_multicollinearity,
    628     multicollinearity_threshold=multicollinearity_threshold,
    629     bin_numeric_features=bin_numeric_features,
    630     remove_outliers=remove_outliers,
    631     outliers_method=outliers_method,
    632     outliers_threshold=outliers_threshold,
    633     fix_imbalance=fix_imbalance,
    634     fix_imbalance_method=fix_imbalance_method,
    635     transformation=transformation,
    636     transformation_method=transformation_method,
    637     normalize=normalize,
    638     normalize_method=normalize_method,
    639     pca=pca,
    640     pca_method=pca_method,
    641     pca_components=pca_components,
    642     feature_selection=feature_selection,
    643     feature_selection_method=feature_selection_method,
    644     feature_selection_estimator=feature_selection_estimator,
    645     n_features_to_select=n_features_to_select,
    646     custom_pipeline=custom_pipeline,
    647     custom_pipeline_position=custom_pipeline_position,
    648     data_split_shuffle=data_split_shuffle,
    649     data_split_stratify=data_split_stratify,
    650     fold_strategy=fold_strategy,
    651     fold=fold,
    652     fold_shuffle=fold_shuffle,
    653     fold_groups=fold_groups,
    654     n_jobs=n_jobs,
    655     use_gpu=use_gpu,
    656     html=html,
    657     session_id=session_id,
    658     system_log=system_log,
    659     log_experiment=log_experiment,
    660     experiment_name=experiment_name,
    661     experiment_custom_tags=experiment_custom_tags,
    662     log_plots=log_plots,
    663     log_profile=log_profile,
    664     log_data=log_data,
    665     verbose=verbose,
    666     memory=memory,
    667     profile=profile,
    668     profile_kwargs=profile_kwargs,
    669 )

File /usr/local/lib/python3.10/site-packages/pycaret/classification/oop.py:758, in ClassificationExperiment.setup(self, data, data_func, target, index, train_size, test_data, ordinal_features, numeric_features, categorical_features, date_features, text_features, ignore_features, keep_features, preprocess, create_date_columns, imputation_type, numeric_imputation, categorical_imputation, iterative_imputation_iters, numeric_iterative_imputer, categorical_iterative_imputer, text_features_method, max_encoding_ohe, encoding_method, rare_to_value, rare_value, polynomial_features, polynomial_degree, low_variance_threshold, group_features, drop_groups, remove_multicollinearity, multicollinearity_threshold, bin_numeric_features, remove_outliers, outliers_method, outliers_threshold, fix_imbalance, fix_imbalance_method, transformation, transformation_method, normalize, normalize_method, pca, pca_method, pca_components, feature_selection, feature_selection_method, feature_selection_estimator, n_features_to_select, custom_pipeline, custom_pipeline_position, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, html, session_id, system_log, log_experiment, experiment_name, experiment_custom_tags, log_plots, log_profile, log_data, engine, verbose, memory, profile, profile_kwargs)
    748 self.data_split_shuffle = data_split_shuffle
    750 self._prepare_folds(
    751     fold_strategy=fold_strategy,
    752     fold=fold,
   (...)
    755     data_split_shuffle=data_split_shuffle,
    756 )
--> 758 self._prepare_train_test(
    759     train_size=train_size,
    760     test_data=test_data,
    761     data_split_stratify=data_split_stratify,
    762     data_split_shuffle=data_split_shuffle,
    763 )
    765 self._prepare_column_types(
    766     ordinal_features=ordinal_features,
    767     numeric_features=numeric_features,
   (...)
    772     keep_features=keep_features,
    773 )
    775 self._set_exp_model_engines(
    776     container_default_engines=get_container_default_engines(),
    777     engine=engine,
    778 )

File /usr/local/lib/python3.10/site-packages/pycaret/internal/preprocess/preprocessor.py:236, in Preprocessor._prepare_train_test(self, train_size, test_data, data_split_stratify, data_split_shuffle)
    233     self.data.index = self.index
    235 # self.data is already prepared here
--> 236 train, test = train_test_split(
    237     self.data,
    238     train_size=train_size,
    239     stratify=get_columns_to_stratify_by(
    240         self.X, self.y, data_split_stratify
    241     ),
    242     random_state=self.seed,
    243     shuffle=data_split_shuffle,
    244 )
    245 self.data = self._set_index(pd.concat([train, test]))
    246 self.idx = [self.data.index[: len(train)], self.data.index[-len(test) :]]

File /usr/local/lib/python3.10/site-packages/sklearn/utils/_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File /usr/local/lib/python3.10/site-packages/sklearn/model_selection/_split.py:2666, in train_test_split(test_size, train_size, random_state, shuffle, stratify, *arrays)
   2664 if shuffle is False:
   2665     if stratify is not None:
-> 2666         raise ValueError(
   2667             "Stratified train/test split is not implemented for shuffle=False"
   2668         )
   2670     train = np.arange(n_train)
   2671     test = np.arange(n_train, n_train + n_test)

ValueError: Stratified train/test split is not implemented for shuffle=False

Installed Versions

'3.3.1'

@m-kaka m-kaka added the bug Something isn't working label Apr 17, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

1 participant