import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LassoCV
import mlflow
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
import pickle
from sklearn.metrics import RocCurveDisplay, roc_curve
mlflow_client = mlflow.client.MlflowClient("http://127.0.0.1:5000")
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("house-sales-2.0")
<Experiment: artifact_location='./mlruns/4', creation_time=1664730900366, experiment_id='4', last_update_time=1664730900366, lifecycle_stage='active', name='house-sales-2.0', tags={}>
df = pd.read_csv("train.csv")
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1460 entries, 0 to 1459 Data columns (total 81 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Id 1460 non-null int64 1 MSSubClass 1460 non-null int64 2 MSZoning 1460 non-null object 3 LotFrontage 1201 non-null float64 4 LotArea 1460 non-null int64 5 Street 1460 non-null object 6 Alley 91 non-null object 7 LotShape 1460 non-null object 8 LandContour 1460 non-null object 9 Utilities 1460 non-null object 10 LotConfig 1460 non-null object 11 LandSlope 1460 non-null object 12 Neighborhood 1460 non-null object 13 Condition1 1460 non-null object 14 Condition2 1460 non-null object 15 BldgType 1460 non-null object 16 HouseStyle 1460 non-null object 17 OverallQual 1460 non-null int64 18 OverallCond 1460 non-null int64 19 YearBuilt 1460 non-null int64 20 YearRemodAdd 1460 non-null int64 21 RoofStyle 1460 non-null object 22 RoofMatl 1460 non-null object 23 Exterior1st 1460 non-null object 24 Exterior2nd 1460 non-null object 25 MasVnrType 1452 non-null object 26 MasVnrArea 1452 non-null float64 27 ExterQual 1460 non-null object 28 ExterCond 1460 non-null object 29 Foundation 1460 non-null object 30 BsmtQual 1423 non-null object 31 BsmtCond 1423 non-null object 32 BsmtExposure 1422 non-null object 33 BsmtFinType1 1423 non-null object 34 BsmtFinSF1 1460 non-null int64 35 BsmtFinType2 1422 non-null object 36 BsmtFinSF2 1460 non-null int64 37 BsmtUnfSF 1460 non-null int64 38 TotalBsmtSF 1460 non-null int64 39 Heating 1460 non-null object 40 HeatingQC 1460 non-null object 41 CentralAir 1460 non-null object 42 Electrical 1459 non-null object 43 1stFlrSF 1460 non-null int64 44 2ndFlrSF 1460 non-null int64 45 LowQualFinSF 1460 non-null int64 46 GrLivArea 1460 non-null int64 47 BsmtFullBath 1460 non-null int64 48 BsmtHalfBath 1460 non-null int64 49 FullBath 1460 non-null int64 50 HalfBath 1460 non-null int64 51 BedroomAbvGr 1460 non-null int64 52 KitchenAbvGr 1460 non-null int64 53 KitchenQual 1460 non-null object 54 TotRmsAbvGrd 1460 non-null int64 55 Functional 1460 non-null object 56 Fireplaces 1460 non-null int64 57 FireplaceQu 770 non-null object 58 GarageType 1379 non-null object 59 GarageYrBlt 1379 non-null float64 60 GarageFinish 1379 non-null object 61 GarageCars 1460 non-null int64 62 GarageArea 1460 non-null int64 63 GarageQual 1379 non-null object 64 GarageCond 1379 non-null object 65 PavedDrive 1460 non-null object 66 WoodDeckSF 1460 non-null int64 67 OpenPorchSF 1460 non-null int64 68 EnclosedPorch 1460 non-null int64 69 3SsnPorch 1460 non-null int64 70 ScreenPorch 1460 non-null int64 71 PoolArea 1460 non-null int64 72 PoolQC 7 non-null object 73 Fence 281 non-null object 74 MiscFeature 54 non-null object 75 MiscVal 1460 non-null int64 76 MoSold 1460 non-null int64 77 YrSold 1460 non-null int64 78 SaleType 1460 non-null object 79 SaleCondition 1460 non-null object 80 SalePrice 1460 non-null int64 dtypes: float64(3), int64(35), object(43) memory usage: 924.0+ KB
Existen varios campos con datos perdidos, aunque al explorar la descripción de los datos nos encontramos con que algunos datos perdidos tienen significado; como por ejemplo PoolQC, en el que el campo NA significa que el inmueble no tiene piscina. Por este motivo se recodificarán los campos de este tipo con el valor por defecto 'no_item' en el momento de las imputaciones. En los otros casos imputaremos a la moda los campos categoricos y a la mediana los campos numéricos.
seed = np.random.randint(1, 10000)
y = df.pop('SalePrice')
X = df
X = X.drop("Id", axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
modas = ['MSSubClass', 'MSZoning', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope',
'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'Heating', 'HeatingQC', 'CentralAir',
'Electrical', 'KitchenQual', 'Functional', 'PavedDrive', 'MoSold', 'SaleType', 'SaleCondition']
medianas = ['LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea',
'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',
'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF',
'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'YrSold']
constantes = ['Alley', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'FireplaceQu',
'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PoolQC', 'Fence', 'MiscFeature']
class CustomImputer(BaseEstimator, TransformerMixin):
def __init__(self, fields):
self.fields = fields
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
return self.__impute(X)
def fit_transform(self, X, y=None):
return self.__impute(X)
def __impute(self, X):
X_out = X.copy()
s_imp = SimpleImputer(strategy='most_frequent')
X_out.loc[:, self.fields['modas']] = s_imp.fit_transform(X.loc[:, self.fields['modas']])
s_imp = SimpleImputer(strategy='median')
X_out.loc[:, self.fields['medianas']] = s_imp.fit_transform(X.loc[:, self.fields['medianas']])
s_imp = SimpleImputer(strategy='constant', fill_value='no-item')
X_out.loc[:, self.fields['constantes']] = s_imp.fit_transform(X.loc[:, self.fields['constantes']])
return X_out
def check_all_fields(fields, columns):
error_fields = [f for f in fields if f not in columns]
error_fields.extend([f for f in columns if f not in fields])
if len(error_fields) > 0:
print(error_fields)
else:
print("Fields ok")
num_fields = list(X_train.select_dtypes(exclude=['object']).columns)
str_fields = list(X_train.select_dtypes(include=['object']).columns)
num_fields.remove('MSSubClass')
str_fields.append('MSSubClass')
idx_num_fields = [i for i, f in enumerate(X_train.columns) if f in num_fields]
idx_str_fields = [i for i, f in enumerate(X_train.columns) if f in str_fields]
fields_ct_impute = modas + medianas + constantes
fields_ct_transform = num_fields + str_fields
check_all_fields(fields_ct_impute, X_train.columns)
check_all_fields(fields_ct_transform, X_train.columns)
Fields ok Fields ok
ct_transform = ColumnTransformer([
("ohe", OneHotEncoder(handle_unknown='ignore'), str_fields),
("nums", MinMaxScaler(), num_fields),
])
params = {
"n_estimators": 500,
"max_depth": 10,
"min_samples_split": 5,
"learning_rate": 0.01,
"loss": "squared_error",
"random_state": seed,
}
pipeline = Pipeline([
('imp', CustomImputer({"medianas": medianas, "modas": modas, "constantes": constantes})),
('transform', ct_transform),
('selection', SelectKBest(score_func=chi2, k=200)),
('model', GradientBoostingRegressor(**params)),
])
pipeline
Pipeline(steps=[('imp',
CustomImputer(fields={'constantes': ['Alley', 'BsmtQual',
'BsmtCond',
'BsmtExposure',
'BsmtFinType1',
'BsmtFinType2',
'FireplaceQu',
'GarageType',
'GarageFinish',
'GarageQual',
'GarageCond', 'PoolQC',
'Fence', 'MiscFeature'],
'medianas': ['LotFrontage', 'LotArea',
'OverallQual',
'OverallCond', 'YearBuilt',
'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1'...
'KitchenAbvGr',
'TotRmsAbvGrd', 'Fireplaces',
'GarageYrBlt', 'GarageCars',
'GarageArea', 'WoodDeckSF',
'OpenPorchSF',
'EnclosedPorch', '3SsnPorch', ...])])),
('selection',
SelectKBest(k=200,
score_func=<function chi2 at 0x000002C7871D3130>)),
('model',
GradientBoostingRegressor(learning_rate=0.01, max_depth=10,
min_samples_split=5,
n_estimators=500,
random_state=4209))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Pipeline(steps=[('imp',
CustomImputer(fields={'constantes': ['Alley', 'BsmtQual',
'BsmtCond',
'BsmtExposure',
'BsmtFinType1',
'BsmtFinType2',
'FireplaceQu',
'GarageType',
'GarageFinish',
'GarageQual',
'GarageCond', 'PoolQC',
'Fence', 'MiscFeature'],
'medianas': ['LotFrontage', 'LotArea',
'OverallQual',
'OverallCond', 'YearBuilt',
'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1'...
'KitchenAbvGr',
'TotRmsAbvGrd', 'Fireplaces',
'GarageYrBlt', 'GarageCars',
'GarageArea', 'WoodDeckSF',
'OpenPorchSF',
'EnclosedPorch', '3SsnPorch', ...])])),
('selection',
SelectKBest(k=200,
score_func=<function chi2 at 0x000002C7871D3130>)),
('model',
GradientBoostingRegressor(learning_rate=0.01, max_depth=10,
min_samples_split=5,
n_estimators=500,
random_state=4209))])CustomImputer(fields={'constantes': ['Alley', 'BsmtQual', 'BsmtCond',
'BsmtExposure', 'BsmtFinType1',
'BsmtFinType2', 'FireplaceQu',
'GarageType', 'GarageFinish', 'GarageQual',
'GarageCond', 'PoolQC', 'Fence',
'MiscFeature'],
'medianas': ['LotFrontage', 'LotArea', 'OverallQual',
'OverallCond', 'YearBuilt', 'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
'BsmtUnfS...
'modas': ['MSSubClass', 'MSZoning', 'Street', 'LotShape',
'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual',
'ExterCond', 'Foundation', 'Heating',
'HeatingQC', 'CentralAir', 'Electrical',
'KitchenQual', 'Functional', 'PavedDrive',
'MoSold', 'SaleType', ...]})ColumnTransformer(transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'),
['MSZoning', 'Street', 'Alley', 'LotShape',
'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual',
'ExterCond', 'Foundation', 'BsmtQual',
'Bsmt...
'OverallCond', 'YearBuilt', 'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF',
'2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',
'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt',
'GarageCars', 'GarageArea', 'WoodDeckSF',
'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', ...])])['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition', 'MSSubClass']
OneHotEncoder(handle_unknown='ignore')
['LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']
MinMaxScaler()
SelectKBest(k=200, score_func=<function chi2 at 0x000002C7871D3130>)
GradientBoostingRegressor(learning_rate=0.01, max_depth=10, min_samples_split=5,
n_estimators=500, random_state=4209)pipeline.get_params()
{'memory': None,
'steps': [('imp',
CustomImputer(fields={'constantes': ['Alley', 'BsmtQual', 'BsmtCond',
'BsmtExposure', 'BsmtFinType1',
'BsmtFinType2', 'FireplaceQu',
'GarageType', 'GarageFinish', 'GarageQual',
'GarageCond', 'PoolQC', 'Fence',
'MiscFeature'],
'medianas': ['LotFrontage', 'LotArea', 'OverallQual',
'OverallCond', 'YearBuilt', 'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
'BsmtUnfS...
'modas': ['MSSubClass', 'MSZoning', 'Street', 'LotShape',
'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual',
'ExterCond', 'Foundation', 'Heating',
'HeatingQC', 'CentralAir', 'Electrical',
'KitchenQual', 'Functional', 'PavedDrive',
'MoSold', 'SaleType', ...]})),
('transform',
ColumnTransformer(transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'),
['MSZoning', 'Street', 'Alley', 'LotShape',
'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual',
'ExterCond', 'Foundation', 'BsmtQual',
'Bsmt...
'OverallCond', 'YearBuilt', 'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF',
'2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',
'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt',
'GarageCars', 'GarageArea', 'WoodDeckSF',
'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', ...])])),
('selection',
SelectKBest(k=200, score_func=<function chi2 at 0x000002C7871D3130>)),
('model',
GradientBoostingRegressor(learning_rate=0.01, max_depth=10, min_samples_split=5,
n_estimators=500, random_state=4209))],
'verbose': False,
'imp': CustomImputer(fields={'constantes': ['Alley', 'BsmtQual', 'BsmtCond',
'BsmtExposure', 'BsmtFinType1',
'BsmtFinType2', 'FireplaceQu',
'GarageType', 'GarageFinish', 'GarageQual',
'GarageCond', 'PoolQC', 'Fence',
'MiscFeature'],
'medianas': ['LotFrontage', 'LotArea', 'OverallQual',
'OverallCond', 'YearBuilt', 'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
'BsmtUnfS...
'modas': ['MSSubClass', 'MSZoning', 'Street', 'LotShape',
'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual',
'ExterCond', 'Foundation', 'Heating',
'HeatingQC', 'CentralAir', 'Electrical',
'KitchenQual', 'Functional', 'PavedDrive',
'MoSold', 'SaleType', ...]}),
'transform': ColumnTransformer(transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'),
['MSZoning', 'Street', 'Alley', 'LotShape',
'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual',
'ExterCond', 'Foundation', 'BsmtQual',
'Bsmt...
'OverallCond', 'YearBuilt', 'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF',
'2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',
'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt',
'GarageCars', 'GarageArea', 'WoodDeckSF',
'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', ...])]),
'selection': SelectKBest(k=200, score_func=<function chi2 at 0x000002C7871D3130>),
'model': GradientBoostingRegressor(learning_rate=0.01, max_depth=10, min_samples_split=5,
n_estimators=500, random_state=4209),
'imp__fields': {'medianas': ['LotFrontage',
'LotArea',
'OverallQual',
'OverallCond',
'YearBuilt',
'YearRemodAdd',
'MasVnrArea',
'BsmtFinSF1',
'BsmtFinSF2',
'BsmtUnfSF',
'TotalBsmtSF',
'1stFlrSF',
'2ndFlrSF',
'LowQualFinSF',
'GrLivArea',
'BsmtFullBath',
'BsmtHalfBath',
'FullBath',
'HalfBath',
'BedroomAbvGr',
'KitchenAbvGr',
'TotRmsAbvGrd',
'Fireplaces',
'GarageYrBlt',
'GarageCars',
'GarageArea',
'WoodDeckSF',
'OpenPorchSF',
'EnclosedPorch',
'3SsnPorch',
'ScreenPorch',
'PoolArea',
'MiscVal',
'YrSold'],
'modas': ['MSSubClass',
'MSZoning',
'Street',
'LotShape',
'LandContour',
'Utilities',
'LotConfig',
'LandSlope',
'Neighborhood',
'Condition1',
'Condition2',
'BldgType',
'HouseStyle',
'RoofStyle',
'RoofMatl',
'Exterior1st',
'Exterior2nd',
'MasVnrType',
'ExterQual',
'ExterCond',
'Foundation',
'Heating',
'HeatingQC',
'CentralAir',
'Electrical',
'KitchenQual',
'Functional',
'PavedDrive',
'MoSold',
'SaleType',
'SaleCondition'],
'constantes': ['Alley',
'BsmtQual',
'BsmtCond',
'BsmtExposure',
'BsmtFinType1',
'BsmtFinType2',
'FireplaceQu',
'GarageType',
'GarageFinish',
'GarageQual',
'GarageCond',
'PoolQC',
'Fence',
'MiscFeature']},
'transform__n_jobs': None,
'transform__remainder': 'drop',
'transform__sparse_threshold': 0.3,
'transform__transformer_weights': None,
'transform__transformers': [('ohe',
OneHotEncoder(handle_unknown='ignore'),
['MSZoning',
'Street',
'Alley',
'LotShape',
'LandContour',
'Utilities',
'LotConfig',
'LandSlope',
'Neighborhood',
'Condition1',
'Condition2',
'BldgType',
'HouseStyle',
'RoofStyle',
'RoofMatl',
'Exterior1st',
'Exterior2nd',
'MasVnrType',
'ExterQual',
'ExterCond',
'Foundation',
'BsmtQual',
'BsmtCond',
'BsmtExposure',
'BsmtFinType1',
'BsmtFinType2',
'Heating',
'HeatingQC',
'CentralAir',
'Electrical',
'KitchenQual',
'Functional',
'FireplaceQu',
'GarageType',
'GarageFinish',
'GarageQual',
'GarageCond',
'PavedDrive',
'PoolQC',
'Fence',
'MiscFeature',
'SaleType',
'SaleCondition',
'MSSubClass']),
('nums',
MinMaxScaler(),
['LotFrontage',
'LotArea',
'OverallQual',
'OverallCond',
'YearBuilt',
'YearRemodAdd',
'MasVnrArea',
'BsmtFinSF1',
'BsmtFinSF2',
'BsmtUnfSF',
'TotalBsmtSF',
'1stFlrSF',
'2ndFlrSF',
'LowQualFinSF',
'GrLivArea',
'BsmtFullBath',
'BsmtHalfBath',
'FullBath',
'HalfBath',
'BedroomAbvGr',
'KitchenAbvGr',
'TotRmsAbvGrd',
'Fireplaces',
'GarageYrBlt',
'GarageCars',
'GarageArea',
'WoodDeckSF',
'OpenPorchSF',
'EnclosedPorch',
'3SsnPorch',
'ScreenPorch',
'PoolArea',
'MiscVal',
'MoSold',
'YrSold'])],
'transform__verbose': False,
'transform__verbose_feature_names_out': True,
'transform__ohe': OneHotEncoder(handle_unknown='ignore'),
'transform__nums': MinMaxScaler(),
'transform__ohe__categories': 'auto',
'transform__ohe__drop': None,
'transform__ohe__dtype': numpy.float64,
'transform__ohe__handle_unknown': 'ignore',
'transform__ohe__max_categories': None,
'transform__ohe__min_frequency': None,
'transform__ohe__sparse': True,
'transform__nums__clip': False,
'transform__nums__copy': True,
'transform__nums__feature_range': (0, 1),
'selection__k': 200,
'selection__score_func': <function sklearn.feature_selection._univariate_selection.chi2(X, y)>,
'model__alpha': 0.9,
'model__ccp_alpha': 0.0,
'model__criterion': 'friedman_mse',
'model__init': None,
'model__learning_rate': 0.01,
'model__loss': 'squared_error',
'model__max_depth': 10,
'model__max_features': None,
'model__max_leaf_nodes': None,
'model__min_impurity_decrease': 0.0,
'model__min_samples_leaf': 1,
'model__min_samples_split': 5,
'model__min_weight_fraction_leaf': 0.0,
'model__n_estimators': 500,
'model__n_iter_no_change': None,
'model__random_state': 4209,
'model__subsample': 1.0,
'model__tol': 0.0001,
'model__validation_fraction': 0.1,
'model__verbose': 0,
'model__warm_start': False}
search = GridSearchCV(
estimator=pipeline,
param_grid={
'selection__k': [120, 160, 200, 240],
'model__max_depth': [6, 8, 10, 12],
'model__min_samples_split': [2, 4, 6],
'model__n_estimators': [200, 400, 600]
},
verbose=3
)
search.fit(X_train, y_train)
Fitting 5 folds for each of 144 candidates, totalling 720 fits
GridSearchCV(estimator=Pipeline(steps=[('imp',
CustomImputer(fields={'constantes': ['Alley',
'BsmtQual',
'BsmtCond',
'BsmtExposure',
'BsmtFinType1',
'BsmtFinType2',
'FireplaceQu',
'GarageType',
'GarageFinish',
'GarageQual',
'GarageCond',
'PoolQC',
'Fence',
'MiscFeature'],
'medianas': ['LotFrontage',
'LotArea',
'OverallQual',
'OverallCond',
'YearBuilt',
'YearRemodAdd',
'M...
SelectKBest(k=200,
score_func=<function chi2 at 0x000002C7871D3130>)),
('model',
GradientBoostingRegressor(learning_rate=0.01,
max_depth=10,
min_samples_split=5,
n_estimators=500,
random_state=4209))]),
param_grid={'model__max_depth': [6, 8, 10, 12],
'model__min_samples_split': [2, 4, 6],
'model__n_estimators': [200, 400, 600],
'selection__k': [120, 160, 200, 240]},
verbose=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(estimator=Pipeline(steps=[('imp',
CustomImputer(fields={'constantes': ['Alley',
'BsmtQual',
'BsmtCond',
'BsmtExposure',
'BsmtFinType1',
'BsmtFinType2',
'FireplaceQu',
'GarageType',
'GarageFinish',
'GarageQual',
'GarageCond',
'PoolQC',
'Fence',
'MiscFeature'],
'medianas': ['LotFrontage',
'LotArea',
'OverallQual',
'OverallCond',
'YearBuilt',
'YearRemodAdd',
'M...
SelectKBest(k=200,
score_func=<function chi2 at 0x000002C7871D3130>)),
('model',
GradientBoostingRegressor(learning_rate=0.01,
max_depth=10,
min_samples_split=5,
n_estimators=500,
random_state=4209))]),
param_grid={'model__max_depth': [6, 8, 10, 12],
'model__min_samples_split': [2, 4, 6],
'model__n_estimators': [200, 400, 600],
'selection__k': [120, 160, 200, 240]},
verbose=3)Pipeline(steps=[('imp',
CustomImputer(fields={'constantes': ['Alley', 'BsmtQual',
'BsmtCond',
'BsmtExposure',
'BsmtFinType1',
'BsmtFinType2',
'FireplaceQu',
'GarageType',
'GarageFinish',
'GarageQual',
'GarageCond', 'PoolQC',
'Fence', 'MiscFeature'],
'medianas': ['LotFrontage', 'LotArea',
'OverallQual',
'OverallCond', 'YearBuilt',
'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1'...
'KitchenAbvGr',
'TotRmsAbvGrd', 'Fireplaces',
'GarageYrBlt', 'GarageCars',
'GarageArea', 'WoodDeckSF',
'OpenPorchSF',
'EnclosedPorch', '3SsnPorch', ...])])),
('selection',
SelectKBest(k=200,
score_func=<function chi2 at 0x000002C7871D3130>)),
('model',
GradientBoostingRegressor(learning_rate=0.01, max_depth=10,
min_samples_split=5,
n_estimators=500,
random_state=4209))])CustomImputer(fields={'constantes': ['Alley', 'BsmtQual', 'BsmtCond',
'BsmtExposure', 'BsmtFinType1',
'BsmtFinType2', 'FireplaceQu',
'GarageType', 'GarageFinish', 'GarageQual',
'GarageCond', 'PoolQC', 'Fence',
'MiscFeature'],
'medianas': ['LotFrontage', 'LotArea', 'OverallQual',
'OverallCond', 'YearBuilt', 'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
'BsmtUnfS...
'modas': ['MSSubClass', 'MSZoning', 'Street', 'LotShape',
'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual',
'ExterCond', 'Foundation', 'Heating',
'HeatingQC', 'CentralAir', 'Electrical',
'KitchenQual', 'Functional', 'PavedDrive',
'MoSold', 'SaleType', ...]})ColumnTransformer(transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'),
['MSZoning', 'Street', 'Alley', 'LotShape',
'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st',
'Exterior2nd', 'MasVnrType', 'ExterQual',
'ExterCond', 'Foundation', 'BsmtQual',
'Bsmt...
'OverallCond', 'YearBuilt', 'YearRemodAdd',
'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF',
'2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',
'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt',
'GarageCars', 'GarageArea', 'WoodDeckSF',
'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', ...])])['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition', 'MSSubClass']
OneHotEncoder(handle_unknown='ignore')
['LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']
MinMaxScaler()
SelectKBest(k=200, score_func=<function chi2 at 0x000002C7871D3130>)
GradientBoostingRegressor(learning_rate=0.01, max_depth=10, min_samples_split=5,
n_estimators=500, random_state=4209)train_score = search.score(X_train, y_train)
train_score
0.9591508955664606
test_score = search.score(X_test, y_test)
test_score
0.7827827279310412
mlflow.log_metric("train_score", train_score)
mlflow.log_metric("test_score", test_score)
best_params = search.best_params_
best_params
{'model__max_depth': 6,
'model__min_samples_split': 4,
'model__n_estimators': 600,
'selection__k': 240}
mlflow.log_params(best_params)
mlflow.log_param("sel_func", "chi2")
'chi2'
file_name = "House-sales.pkl"
with open(file_name, "wb") as f:
pickle.dump(search.best_estimator_, f)
mlflow.log_artifact(file_name, "model")
import sklearn
with open("versions.txt", "w") as f:
f.writelines([
f"La versión de pandas es: {pd.__version__}",
f"La versión de sklearn es: {sklearn.__version__}",
f"La versión de mlflow es: {mlflow.__version__}"
])
mlflow.log_artifact("versions.txt")
mlflow.end_run()