Skip to content
Rain Hu's Workspace
Go back

[AI] 迴歸問題

Rain Hu

認識波士頓住房價資料集

查看特徵分布

準備資料

x=xμσ x' = \frac{x-\mu}{\sigma}
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)
train_data -= mean
train_data /= std
test_data -= mean
test_data /= std

建立模型

from keras import models
from keras import layers

def build_model():
    model = keras.Sequential([
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)
    ])
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

K-fold 驗證

\begin{array}{cccccccc} &\text{part 1}&\text{part 2}&\text{part 3}&\text{part 4}&\text{part 5}\\\\ \hline \text{1st fold} & \boxed{\red{\text{驗證}}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \rightarrow & \text{測試分數 \\#1}\\\\ \text{2nd fold} & \boxed{\text{訓練}} & \boxed{\red{\text{驗證}}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \rightarrow & \text{測試分數 \\#2}\\\\ \text{3rd fold} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\red{\text{驗證}}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \rightarrow & \text{測試分數 \\#3}\\\\ \text{4th fold} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\red{\text{驗證}}} & \boxed{\text{訓練}} & \rightarrow & \text{測試分數 \\#4}\\\\ \text{5th fold} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\text{訓練}} & \boxed{\red{\text{驗證}}} & \rightarrow & \text{測試分數 \\#5} \end{array}
from os import kill
k = 5
n = len(train_data) // k
epochs = 100
scores = []
for i in range(k):
    print('Processing fold #', i)
    val_data = train_data[i * n: (i+1) * n]
    val_targets = train_targets[i * n: (i+1) * n]
    partial_train_data = np.concatenate(
        [train_data[:i * n],
         train_data[(i+1) * n:]],
        axis=0
    )
    partial_train_targets = np.concatenate(
        [train_targets[:i * n],
         train_targets[(i+1) * n:]],
        axis=0
    )

    model = build_model()
    model.fit(partial_train_data, partial_train_targets, 
              epochs=epochs, batch_size=16, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    scores.append(val_mae)
print(scores)
print(np.mean(scores))

> [1.720955491065979, 2.8763468265533447, 2.1907858848571777, 2.566359043121338, 2.54329252243042]
> 2.379547953605652
# 使用全部訓練數據重新訓練模型
final_model = build_model()
final_history = final_model.fit(
    train_data, train_targets,
    epochs=best_epoch,  # 使用找到的最佳 epoch 數
    batch_size=16,
    verbose=1
)
predictions = model.predict(test_data)

Share this post on:

Previous
[AI] 普適化
Next
[AI] 多元分類問題