[BUG]: Failed to set the disabled "data_split_shuffle" option in classification. #3975

m-kaka · 2024-04-17T07:53:54Z

pycaret version checks

I have checked that this issue has not already been reported here.
I have confirmed this bug exists on the latest version of pycaret.
I have confirmed this bug exists on the master branch of pycaret (pip install -U git+https://github.com/pycaret/pycaret.git@master).

Issue Description

In case of classification model, it failed to set the "data_split_shuffle" option as disabled.

Reproducible Example

from pycaret.datasets import get_data
from pycaret.classification import *
data = get_data('iris')
s = setup(data, target = 'species', data_split_shuffle = False, session_id = 123)

Expected Behavior

Work if disabled the data_split_shuffle option.

Actual Results

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[1], line 4
      2 from pycaret.classification import *
      3 data = get_data('iris')
----> 4 s = setup(data, target = 'species', data_split_shuffle = False, session_id = 123)

File /usr/local/lib/python3.10/site-packages/pycaret/classification/functional.py:595, in setup(data, data_func, target, index, train_size, test_data, ordinal_features, numeric_features, categorical_features, date_features, text_features, ignore_features, keep_features, preprocess, create_date_columns, imputation_type, numeric_imputation, categorical_imputation, iterative_imputation_iters, numeric_iterative_imputer, categorical_iterative_imputer, text_features_method, max_encoding_ohe, encoding_method, rare_to_value, rare_value, polynomial_features, polynomial_degree, low_variance_threshold, group_features, drop_groups, remove_multicollinearity, multicollinearity_threshold, bin_numeric_features, remove_outliers, outliers_method, outliers_threshold, fix_imbalance, fix_imbalance_method, transformation, transformation_method, normalize, normalize_method, pca, pca_method, pca_components, feature_selection, feature_selection_method, feature_selection_estimator, n_features_to_select, custom_pipeline, custom_pipeline_position, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, html, session_id, system_log, log_experiment, experiment_name, experiment_custom_tags, log_plots, log_profile, log_data, verbose, memory, profile, profile_kwargs)
    593 exp = _EXPERIMENT_CLASS()
    594 set_current_experiment(exp)
--> 595 return exp.setup(
    596     data=data,
    597     data_func=data_func,
    598     target=target,
    599     index=index,
    600     train_size=train_size,
    601     test_data=test_data,
    602     ordinal_features=ordinal_features,
    603     numeric_features=numeric_features,
    604     categorical_features=categorical_features,
    605     date_features=date_features,
    606     text_features=text_features,
    607     ignore_features=ignore_features,
    608     keep_features=keep_features,
    609     preprocess=preprocess,
    610     create_date_columns=create_date_columns,
    611     imputation_type=imputation_type,
    612     numeric_imputation=numeric_imputation,
    613     categorical_imputation=categorical_imputation,
    614     iterative_imputation_iters=iterative_imputation_iters,
    615     numeric_iterative_imputer=numeric_iterative_imputer,
    616     categorical_iterative_imputer=categorical_iterative_imputer,
    617     text_features_method=text_features_method,
    618     max_encoding_ohe=max_encoding_ohe,
    619     encoding_method=encoding_method,
    620     rare_to_value=rare_to_value,
    621     rare_value=rare_value,
    622     polynomial_features=polynomial_features,
    623     polynomial_degree=polynomial_degree,
    624     low_variance_threshold=low_variance_threshold,
    625     group_features=group_features,
    626     drop_groups=drop_groups,
    627     remove_multicollinearity=remove_multicollinearity,
    628     multicollinearity_threshold=multicollinearity_threshold,
    629     bin_numeric_features=bin_numeric_features,
    630     remove_outliers=remove_outliers,
    631     outliers_method=outliers_method,
    632     outliers_threshold=outliers_threshold,
    633     fix_imbalance=fix_imbalance,
    634     fix_imbalance_method=fix_imbalance_method,
    635     transformation=transformation,
    636     transformation_method=transformation_method,
    637     normalize=normalize,
    638     normalize_method=normalize_method,
    639     pca=pca,
    640     pca_method=pca_method,
    641     pca_components=pca_components,
    642     feature_selection=feature_selection,
    643     feature_selection_method=feature_selection_method,
    644     feature_selection_estimator=feature_selection_estimator,
    645     n_features_to_select=n_features_to_select,
    646     custom_pipeline=custom_pipeline,
    647     custom_pipeline_position=custom_pipeline_position,
    648     data_split_shuffle=data_split_shuffle,
    649     data_split_stratify=data_split_stratify,
    650     fold_strategy=fold_strategy,
    651     fold=fold,
    652     fold_shuffle=fold_shuffle,
    653     fold_groups=fold_groups,
    654     n_jobs=n_jobs,
    655     use_gpu=use_gpu,
    656     html=html,
    657     session_id=session_id,
    658     system_log=system_log,
    659     log_experiment=log_experiment,
    660     experiment_name=experiment_name,
    661     experiment_custom_tags=experiment_custom_tags,
    662     log_plots=log_plots,
    663     log_profile=log_profile,
    664     log_data=log_data,
    665     verbose=verbose,
    666     memory=memory,
    667     profile=profile,
    668     profile_kwargs=profile_kwargs,
    669 )

File /usr/local/lib/python3.10/site-packages/pycaret/classification/oop.py:758, in ClassificationExperiment.setup(self, data, data_func, target, index, train_size, test_data, ordinal_features, numeric_features, categorical_features, date_features, text_features, ignore_features, keep_features, preprocess, create_date_columns, imputation_type, numeric_imputation, categorical_imputation, iterative_imputation_iters, numeric_iterative_imputer, categorical_iterative_imputer, text_features_method, max_encoding_ohe, encoding_method, rare_to_value, rare_value, polynomial_features, polynomial_degree, low_variance_threshold, group_features, drop_groups, remove_multicollinearity, multicollinearity_threshold, bin_numeric_features, remove_outliers, outliers_method, outliers_threshold, fix_imbalance, fix_imbalance_method, transformation, transformation_method, normalize, normalize_method, pca, pca_method, pca_components, feature_selection, feature_selection_method, feature_selection_estimator, n_features_to_select, custom_pipeline, custom_pipeline_position, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, html, session_id, system_log, log_experiment, experiment_name, experiment_custom_tags, log_plots, log_profile, log_data, engine, verbose, memory, profile, profile_kwargs)
    748 self.data_split_shuffle = data_split_shuffle
    750 self._prepare_folds(
    751     fold_strategy=fold_strategy,
    752     fold=fold,
   (...)
    755     data_split_shuffle=data_split_shuffle,
    756 )
--> 758 self._prepare_train_test(
    759     train_size=train_size,
    760     test_data=test_data,
    761     data_split_stratify=data_split_stratify,
    762     data_split_shuffle=data_split_shuffle,
    763 )
    765 self._prepare_column_types(
    766     ordinal_features=ordinal_features,
    767     numeric_features=numeric_features,
   (...)
    772     keep_features=keep_features,
    773 )
    775 self._set_exp_model_engines(
    776     container_default_engines=get_container_default_engines(),
    777     engine=engine,
    778 )

File /usr/local/lib/python3.10/site-packages/pycaret/internal/preprocess/preprocessor.py:236, in Preprocessor._prepare_train_test(self, train_size, test_data, data_split_stratify, data_split_shuffle)
    233     self.data.index = self.index
    235 # self.data is already prepared here
--> 236 train, test = train_test_split(
    237     self.data,
    238     train_size=train_size,
    239     stratify=get_columns_to_stratify_by(
    240         self.X, self.y, data_split_stratify
    241     ),
    242     random_state=self.seed,
    243     shuffle=data_split_shuffle,
    244 )
    245 self.data = self._set_index(pd.concat([train, test]))
    246 self.idx = [self.data.index[: len(train)], self.data.index[-len(test) :]]

File /usr/local/lib/python3.10/site-packages/sklearn/utils/_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File /usr/local/lib/python3.10/site-packages/sklearn/model_selection/_split.py:2666, in train_test_split(test_size, train_size, random_state, shuffle, stratify, *arrays)
   2664 if shuffle is False:
   2665     if stratify is not None:
-> 2666         raise ValueError(
   2667             "Stratified train/test split is not implemented for shuffle=False"
   2668         )
   2670     train = np.arange(n_train)
   2671     test = np.arange(n_train, n_train + n_test)

ValueError: Stratified train/test split is not implemented for shuffle=False

Installed Versions

'3.3.1'

m-kaka added the bug Something isn't working label Apr 17, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[BUG]: Failed to set the disabled "data_split_shuffle" option in classification. #3975

[BUG]: Failed to set the disabled "data_split_shuffle" option in classification. #3975

m-kaka commented Apr 17, 2024

[BUG]: Failed to set the disabled "data_split_shuffle" option in classification. #3975

[BUG]: Failed to set the disabled "data_split_shuffle" option in classification. #3975

Comments

m-kaka commented Apr 17, 2024

pycaret version checks

Issue Description

Reproducible Example

Expected Behavior

Actual Results

Installed Versions