object_cols = [col for col in X.columns if X[col].dtype == "object"] num_cols = list(set(X.columns) - set(object_cols)) X = X.fillna(X.mean()) X_test = X_test.fillna(X_test.mean()) object_imputer = SimpleImputer(strategy="most_frequent") X[object_cols] = pd.DataFrame(object_imputer.fit_transform(X[object_cols])) X_test[object_cols] = pd.DataFrame(object_imputer.transform(X_test[object_cols])) mapping = [ {"col": "Dependents", "mapping": { "0": 0, "1": 1, "2": 2, "3+": 3 }}, {"col": "Education", "mapping": { "Not Graduate": 0, "Graduate": 1 }} ] onehot_cols = ["Gender", "Property_Area"] ord_cols = list(set(object_cols) - set(onehot_cols)) for col in ord_cols: mapping.append({"col": col, "mapping": { "No": 0, "Yes": 1 }}) encoder = Pipeline(steps=[ ("ordinal", OrdinalEncoder(mapping=mapping)), ("onehot", OneHotEncoder(cols=onehot_cols)) ]) X = pd.DataFrame(encoder.fit_transform(X)) X_test = pd.DataFrame(encoder.transform(X_test))
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)