RandomizedSearchCV
概要
グリッドサーチでのパラメータチューニングしか行ったことがなかったので試してみた。
環境
データの準備
import xgboost as xgb from sklearn import datasets from sklearn.model_selection import train_test_split, KFold, RandomizedSearchCV import pandas as pd import numpy as np import inspect from scipy.stats import pearsonr data = load_boston() x_train, x_test, y_train, y_test = train_test_split(data['data'], data['target'], test_size=0.2, random_state=0) kf = KFold(n_splits=5, shuffle=False, random_state=0)
モデルの作成
params = {'objective': 'reg:squarederror'} model = xgb.XGBRegressor() model.set_params(**params)
クロスバリデーションの評価関数の定義
def cv_r_score(estimator, x, y): y_pred = estimator.predict(x) y = y.T[0] r, p = pearsonr(y, y_pred) return r
RandomizedSearchCV
params = { 'max_depth':[5, 10, 15], 'min_child_weight': [1, 3, 6],'max_delta_step':[0, 4, 8], 'gamma': [0.0, 0.1, 0.2], 'subsample':[0.5, 0.7, 0.9], 'colsample_bytree':[0.5, 0.7, 0.9] } rmcv = RandomizedSearchCV(model, params, cv=kf, scoring=cv_r_score, n_iter=10) rmcv.fit(x_train, y_train)
結果の出力
results = rmcv.cv_results_ for key in results.keys(): if key == 'params': continue if 'param' in key: print(f"{'_'.join(key.split('_')[1:])}: {results[key]}") print(results['mean_test_score'])
subsample: [0.9 0.9 0.5 0.5 0.9 0.7 0.5 0.7 0.9 0.7] min_child_weight: [3 3 3 1 6 6 6 3 3 1] max_depth: [10 10 10 10 15 5 15 5 5 15] max_delta_step: [8 4 0 8 0 0 4 0 0 4] gamma: [0.0 0.1 0.1 0.0 0.1 0.0 0.0 0.0 0.0 0.2] colsample_bytree: [0.7 0.5 0.5 0.5 0.5 0.7 0.5 0.7 0.9 0.5] [0.89627005 0.89432932 0.89878338 0.88887008 0.89917085 0.90628873 0.8967743 0.90508957 0.90810282 0.88271599]