In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
import lightgbm as lgb
파일 불러오기¶
In [2]:
label = pd.read_csv("label.csv", encoding='cp949')
In [3]:
raw_x = label[['gender','c_eye_left_size','c_eye_right_size','c_eye_left_width','c_eye_right_width','c_eye_nose_left_align','c_eye_nose_right_align','c_nose_root_width','c_nose_alar_width','c_nose_height','c_eyebrow_left_width','c_eyebrow_right_width','c_lip_upper_height','c_lip_under_height','c_lip_width','c_brow_eye_left_inner_gap','c_brow_eye_right_inner_gap','c_brow_eye_left_outer_gap','c_brow_eye_right_outer_gap','c_eyebrow_balance_left','c_eyebrow_balance_right','c_alar_left_width_shape','c_alar_right_width_shape','c_alar_left_height_shape','c_alar_right_height_shape','c_lip_ratio','c_ratio_vertical','c_ratio_horizontal','c_lip_nose_gap','c_brow_nose_left_gap','c_brow_nose_right_gap','age']]
raw_y = label[['score']]
성별의 경우 카테고리로 전환¶
In [4]:
raw_x['gender'] = raw_x['gender'].astype('category')
In [5]:
train_x, test_x, train_y, test_y = train_test_split(raw_x, raw_y, test_size=0.2, random_state=42)
lightgbm 데이터셋으로 변환¶
In [6]:
train_ds = lgb.Dataset(train_x, label = train_y)
test_ds = lgb.Dataset(test_x, label = test_y)
파라미터 세팅¶
In [7]:
params = {'learning_rate': 0.01,
'max_depth': 16,
'boosting': 'gbdt',
'objective': 'regression',
'metric': 'mse',
'is_training_metric': True,
'num_leaves': 144,
'feature_fraction': 0.9,
'bagging_fraction': 0.7,
'bagging_freq': 5,
'seed':2018}
훈련¶
In [8]:
model = lgb.train(params, train_ds, 1000, test_ds, verbose_eval=100, early_stopping_rounds=100)
In [9]:
predict_train = model.predict(train_x)
predict_test = model.predict(test_x)
에러 측정¶
In [10]:
mse = mean_squared_error(test_y, predict_test)
r2 = r2_score(test_y, predict_test)
In [11]:
print('Mean squared error: ', mse)
print('R2 score: ', r2)
In [12]:
final_result = pd.concat([test_y.reset_index(drop=True), pd.DataFrame(predict_test)], axis = 1)
In [13]:
final_result.columns = ['label','predict']
fit결과 확인¶
In [14]:
sns.regplot(x = 'label', y = 'predict', data = final_result)
Out[14]: