본문 바로가기

소소한/#Python

모델검증 train_test_split

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 5)

print('TrainSet', X_train.shape)
print('TrainSet', X_test.shape)

print('testSet', y_train.shape)
print('testSet', y_test.shape)

plt.figure(figsize = (12,4))
plt.subplot(121)
plt.scatter(X_train.iloc[:,0], X_train.iloc[:,3],  alpha = 0.8)
plt.title('Train Set')
plt.subplot(122)
plt.scatter(X_test.iloc[:,0], X_test.iloc[:,3],  alpha = 0.8)
plt.title('Test Set')

from xgboost import XGBRegressor

xgb_reg = XGBRegressor()

xgb_reg.fit(X_train,y_train)
print('Train score :', xgb_reg.score(X_train, y_train))
print('Test score:', xgb_reg.score(X_test, y_test))

 

 

val_socre = []

for k in range(1, 50):

     score = cross_val_score(모델, X_train, y_train, cv = 5).mean()

     val_score.append(score)

 

plt.plot(val_score)

 

 

https://www.youtube.com/watch?v=w_bLGK4Pteo