In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
import tensorflow as tf
from keras import Model
from keras.layers import Input, Dense, Dropout, LSTM
from keras.callbacks import EarlyStopping
# 忽略警告
import warnings
warnings.filterwarnings('ignore')
# 数据加载与基本检查
df = pd.read_csv('Gold Price (2013-2022).csv')
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values(by='Date', ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)
# 删除不必要的特征
df.drop(['Vol.', 'Change %'], axis=1, inplace=True)
# 去除逗号并转换数据类型
num_cols = df.columns.drop(['Date'])
df[num_cols] = df[num_cols].replace({',': ''}, regex=True).astype('float64')
# 数据可视化
fig = px.line(y=df.Price, x=df.Date, title="Gold Price History Data")
fig.update_traces(line_color='black')
fig.update_layout(
xaxis_title="Date", yaxis_title="Price",
plot_bgcolor='rgba(255,223,0,0.8)'
)
fig.show()
# 数据分割:训练集和测试集
test_size = df[df.Date.dt.year == 2022].shape[0]
train_data = df.Price[:-test_size]
test_data = df.Price[-test_size-60:] # 测试集包含滑动窗口部分
# 数据归一化
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data.values.reshape(-1, 1))
test_scaled = scaler.transform(test_data.values.reshape(-1, 1))
# 滑动窗口构建数据
window_size = 60
# 训练集
X_train, y_train = [], []
for i in range(window_size, len(train_scaled)):
X_train.append(train_scaled[i-window_size:i, 0])
y_train.append(train_scaled[i, 0])
# 测试集
X_test, y_test = [], []
for i in range(window_size, len(test_scaled)):
X_test.append(test_scaled[i-window_size:i, 0])
y_test.append(test_scaled[i, 0])
# 转换为Numpy数组
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
# 调整输入形状
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# 定义优化的LSTM模型
def define_optimized_model():
input_layer = Input(shape=(window_size, 1))
x = LSTM(64, return_sequences=True, activation='tanh')(input_layer)
x = Dropout(0.2)(x)
x = LSTM(32, return_sequences=False, activation='tanh')(x)
x = Dropout(0.2)(x)
output_layer = Dense(1, activation='linear')(x)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='mean_squared_error', optimizer='adam')
return model
# 模型训练
model = define_optimized_model()
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(
X_train, y_train,
epochs=100, batch_size=32,
validation_split=0.2, verbose=1,
callbacks=[early_stop]
)
# 模型评估
y_pred_scaled = model.predict(X_test)
y_test_true = scaler.inverse_transform(y_test.reshape(-1, 1))
y_test_pred = scaler.inverse_transform(y_pred_scaled)
# 计算评估指标
mse = mean_squared_error(y_test_true, y_test_pred)
mae = mean_absolute_error(y_test_true, y_test_pred)
mape = mean_absolute_percentage_error(y_test_true, y_test_pred)
r2 = 1 - (np.sum((y_test_true - y_test_pred) ** 2) / np.sum((y_test_true - np.mean(y_test_true)) ** 2))
print(f"Test MSE: {mse:.4f}")
print(f"Test MAE: {mae:.4f}")
print(f"Test MAPE: {mape:.2%}")
print(f"Test R²: {r2:.4f}")
# 可视化模型预测效果
plt.figure(figsize=(15, 6), dpi=150)
plt.plot(df['Date'].iloc[:-test_size], scaler.inverse_transform(train_scaled), color='black', label='Training Data')
plt.plot(df['Date'].iloc[-test_size:], y_test_true, color='blue', label='Actual Test Data')
plt.plot(df['Date'].iloc[-test_size:], y_test_pred, color='red', label='Predicted Test Data')
plt.title('Model Performance on Gold Price Prediction', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(loc='upper left', prop={'size': 12})
plt.grid(color='gray', linestyle='--')
plt.show()
Epoch 1/100 57/57 [==============================] - 4s 34ms/step - loss: 0.0082 - val_loss: 0.0027 Epoch 2/100 57/57 [==============================] - 1s 25ms/step - loss: 0.0018 - val_loss: 0.0041 Epoch 3/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0016 - val_loss: 0.0015 Epoch 4/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0014 - val_loss: 0.0017 Epoch 5/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0016 - val_loss: 0.0045 Epoch 6/100 57/57 [==============================] - 1s 26ms/step - loss: 0.0012 - val_loss: 0.0044 Epoch 7/100 57/57 [==============================] - 1s 25ms/step - loss: 0.0012 - val_loss: 0.0028 Epoch 8/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0011 - val_loss: 0.0019 Epoch 9/100 57/57 [==============================] - 1s 25ms/step - loss: 0.0012 - val_loss: 0.0017 Epoch 10/100 57/57 [==============================] - 1s 26ms/step - loss: 0.0011 - val_loss: 9.5650e-04 Epoch 11/100 57/57 [==============================] - 2s 28ms/step - loss: 0.0011 - val_loss: 8.8959e-04 Epoch 12/100 57/57 [==============================] - 2s 28ms/step - loss: 0.0011 - val_loss: 8.6934e-04 Epoch 13/100 57/57 [==============================] - 2s 28ms/step - loss: 9.3384e-04 - val_loss: 0.0015 Epoch 14/100 57/57 [==============================] - 2s 29ms/step - loss: 9.5245e-04 - val_loss: 0.0010 Epoch 15/100 57/57 [==============================] - 2s 28ms/step - loss: 8.8055e-04 - val_loss: 7.6218e-04 Epoch 16/100 57/57 [==============================] - 2s 29ms/step - loss: 9.5084e-04 - val_loss: 0.0011 Epoch 17/100 57/57 [==============================] - 2s 29ms/step - loss: 8.6539e-04 - val_loss: 0.0012 Epoch 18/100 57/57 [==============================] - 2s 29ms/step - loss: 8.0801e-04 - val_loss: 0.0012 Epoch 19/100 57/57 [==============================] - 2s 29ms/step - loss: 7.3625e-04 - val_loss: 7.2624e-04 Epoch 20/100 57/57 [==============================] - 2s 28ms/step - loss: 7.4719e-04 - val_loss: 7.2584e-04 Epoch 21/100 57/57 [==============================] - 2s 29ms/step - loss: 7.7573e-04 - val_loss: 9.8627e-04 Epoch 22/100 57/57 [==============================] - 2s 28ms/step - loss: 6.2333e-04 - val_loss: 9.3673e-04 Epoch 23/100 57/57 [==============================] - 2s 29ms/step - loss: 6.5290e-04 - val_loss: 0.0025 Epoch 24/100 57/57 [==============================] - 2s 29ms/step - loss: 6.5372e-04 - val_loss: 0.0042 Epoch 25/100 57/57 [==============================] - 2s 29ms/step - loss: 6.5522e-04 - val_loss: 0.0017 Epoch 26/100 57/57 [==============================] - 2s 28ms/step - loss: 6.8554e-04 - val_loss: 0.0030 Epoch 27/100 57/57 [==============================] - 2s 28ms/step - loss: 5.9007e-04 - val_loss: 0.0011 Epoch 28/100 57/57 [==============================] - 2s 28ms/step - loss: 5.8071e-04 - val_loss: 9.5184e-04 Epoch 29/100 57/57 [==============================] - 2s 28ms/step - loss: 6.0606e-04 - val_loss: 5.8515e-04 Epoch 30/100 57/57 [==============================] - 2s 28ms/step - loss: 5.5738e-04 - val_loss: 5.8165e-04 Epoch 31/100 57/57 [==============================] - 2s 28ms/step - loss: 5.3708e-04 - val_loss: 5.5287e-04 Epoch 32/100 57/57 [==============================] - 2s 28ms/step - loss: 4.7901e-04 - val_loss: 5.2770e-04 Epoch 33/100 57/57 [==============================] - 2s 28ms/step - loss: 4.9871e-04 - val_loss: 7.4646e-04 Epoch 34/100 57/57 [==============================] - 2s 28ms/step - loss: 4.9541e-04 - val_loss: 8.9639e-04 Epoch 35/100 57/57 [==============================] - 2s 28ms/step - loss: 4.5409e-04 - val_loss: 0.0015 Epoch 36/100 57/57 [==============================] - 2s 28ms/step - loss: 4.2745e-04 - val_loss: 0.0010 Epoch 37/100 57/57 [==============================] - 2s 28ms/step - loss: 4.5775e-04 - val_loss: 0.0019 Epoch 38/100 57/57 [==============================] - 2s 29ms/step - loss: 4.6556e-04 - val_loss: 5.7972e-04 Epoch 39/100 57/57 [==============================] - 2s 28ms/step - loss: 4.4506e-04 - val_loss: 6.3251e-04 Epoch 40/100 57/57 [==============================] - 2s 28ms/step - loss: 4.3470e-04 - val_loss: 4.8006e-04 Epoch 41/100 57/57 [==============================] - 2s 28ms/step - loss: 3.9713e-04 - val_loss: 9.8011e-04 Epoch 42/100 57/57 [==============================] - 2s 29ms/step - loss: 4.3144e-04 - val_loss: 0.0030 Epoch 43/100 57/57 [==============================] - 2s 29ms/step - loss: 4.4934e-04 - val_loss: 5.1773e-04 Epoch 44/100 57/57 [==============================] - 2s 29ms/step - loss: 4.3007e-04 - val_loss: 5.0296e-04 Epoch 45/100 57/57 [==============================] - 2s 30ms/step - loss: 3.9985e-04 - val_loss: 0.0016 Epoch 46/100 57/57 [==============================] - 2s 31ms/step - loss: 4.2366e-04 - val_loss: 0.0011 Epoch 47/100 57/57 [==============================] - 2s 29ms/step - loss: 3.9213e-04 - val_loss: 0.0011 Epoch 48/100 57/57 [==============================] - 2s 26ms/step - loss: 4.0857e-04 - val_loss: 6.3791e-04 Epoch 49/100 57/57 [==============================] - 1s 25ms/step - loss: 3.8211e-04 - val_loss: 6.1667e-04 Epoch 50/100 57/57 [==============================] - 1s 25ms/step - loss: 3.7417e-04 - val_loss: 6.2659e-04 9/9 [==============================] - 1s 8ms/step Test MSE: 374.8673 Test MAE: 15.0888 Test MAPE: 0.84% Test R²: 0.9548
In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
import tensorflow as tf
from keras import Model
from keras.layers import Input, Dense, Dropout, LSTM
from keras.callbacks import EarlyStopping
import warnings
warnings.filterwarnings('ignore')
# === 数据加载与预处理 ===
df = pd.read_csv('Gold Price (2013-2022).csv')
df.drop(['Vol.', 'Change %'], axis=1, inplace=True)
# 转换日期格式并排序
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values(by='Date', ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)
# 去除“,”符号并转换为float
NumCols = df.columns.drop(['Date'])
df[NumCols] = df[NumCols].replace({',': ''}, regex=True).astype('float64')
# 缺失值检测
if df.isnull().sum().sum() > 0:
print("数据中存在缺失值!请检查。")
else:
print("数据无缺失值。")
# 可视化黄金价格
fig = px.line(y=df.Price, x=df.Date, title="Gold Price History Data")
fig.update_traces(line_color='black')
fig.update_layout(plot_bgcolor='rgba(255,223,0,0.8)', xaxis_title="Date", yaxis_title="Scaled Price")
fig.show()
# === 数据切分 ===
test_size = df[df.Date.dt.year == 2022].shape[0]
scaler = MinMaxScaler()
scaler.fit(df.Price.values.reshape(-1, 1))
train_data = scaler.transform(df.Price[:-test_size].values.reshape(-1, 1))
test_data = scaler.transform(df.Price[-test_size - 60:].values.reshape(-1, 1))
# 滑动窗口构造
window_size = 60
X_train, y_train, X_test, y_test = [], [], [], []
for i in range(window_size, len(train_data)):
X_train.append(train_data[i - window_size:i, 0])
y_train.append(train_data[i, 0])
for i in range(window_size, len(test_data)):
X_test.append(test_data[i - window_size:i, 0])
y_test.append(test_data[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
# 调整形状
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))
# === 模型构建 ===
def define_model():
input1 = Input(shape=(window_size, 1))
x = LSTM(units=64, return_sequences=True)(input1)
x = Dropout(0.2)(x)
x = LSTM(units=32)(x) # 减少过拟合风险
x = Dropout(0.2)(x)
dnn_output = Dense(1)(x) # 改用线性激活函数
model = Model(inputs=input1, outputs=[dnn_output])
model.compile(loss='mean_squared_error', optimizer='Adam') # 更适合回归的优化器
return model
model = define_model()
# 训练
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2,
callbacks=[early_stopping], verbose=1)
# === 可视化结果 ===
y_test_true = scaler.inverse_transform(y_test)
y_test_pred = scaler.inverse_transform(y_pred)
plt.figure(figsize=(15, 6))
plt.plot(df['Date'].iloc[:-test_size], scaler.inverse_transform(train_data), color='black', lw=2)
plt.plot(df['Date'].iloc[-test_size:], y_test_true, color='blue', lw=2)
plt.plot(df['Date'].iloc[-test_size:], y_test_pred, color='red', lw=2)
plt.title('Model Performance on Gold Price Prediction', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Training Data', 'Actual Test Data', 'Predicted Test Data'], loc='upper left', prop={'size': 15})
plt.grid(color='white')
plt.show()
# === 模型评估 ===
y_pred = model.predict(X_test)
MAPE = mean_absolute_percentage_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
print(f"MAPE: {MAPE * 100:.2f}%")
print(f"RMSE: {RMSE:.2f}")
MAE = np.mean(np.abs(y_test - y_pred))
print(f"MAE: {MAE:.2f}")
from sklearn.metrics import r2_score
R2 = r2_score(y_test, y_pred)
print(f"R² Score: {R2:.2f}")
SMAPE = np.mean(2 * np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print(f"SMAPE: {SMAPE:.2f}%")
from sklearn.metrics import explained_variance_score
EVS = explained_variance_score(y_test, y_pred)
print(f"Explained Variance Score: {EVS:.2f}")
residuals = y_test - y_pred
plt.figure(figsize=(10, 5))
plt.hist(residuals, bins=50, color='purple', alpha=0.7)
plt.title('Residual Distribution')
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.show()
数据无缺失值。
Epoch 1/100 57/57 [==============================] - 4s 32ms/step - loss: 0.0074 - val_loss: 0.0026 Epoch 2/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0017 - val_loss: 0.0030 Epoch 3/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0016 - val_loss: 0.0019 Epoch 4/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0016 - val_loss: 0.0014 Epoch 5/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0014 - val_loss: 0.0019 Epoch 6/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0015 - val_loss: 0.0097 Epoch 7/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0013 - val_loss: 0.0017 Epoch 8/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0012 - val_loss: 0.0040 Epoch 9/100 57/57 [==============================] - 1s 23ms/step - loss: 0.0011 - val_loss: 0.0014 Epoch 10/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0011 - val_loss: 0.0027 Epoch 11/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0011 - val_loss: 0.0013 Epoch 12/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0011 - val_loss: 0.0012 Epoch 13/100 57/57 [==============================] - 1s 24ms/step - loss: 9.5916e-04 - val_loss: 0.0029 Epoch 14/100 57/57 [==============================] - 1s 24ms/step - loss: 9.7136e-04 - val_loss: 0.0019 Epoch 15/100 57/57 [==============================] - 1s 24ms/step - loss: 0.0010 - val_loss: 0.0026 Epoch 16/100 57/57 [==============================] - 1s 24ms/step - loss: 8.5942e-04 - val_loss: 0.0035 Epoch 17/100 57/57 [==============================] - 1s 24ms/step - loss: 8.0067e-04 - val_loss: 0.0011 Epoch 18/100 57/57 [==============================] - 1s 24ms/step - loss: 7.9242e-04 - val_loss: 7.1610e-04 Epoch 19/100 57/57 [==============================] - 1s 24ms/step - loss: 7.2196e-04 - val_loss: 8.6864e-04 Epoch 20/100 57/57 [==============================] - 1s 24ms/step - loss: 7.6009e-04 - val_loss: 6.9524e-04 Epoch 21/100 57/57 [==============================] - 1s 24ms/step - loss: 7.4915e-04 - val_loss: 0.0013 Epoch 22/100 57/57 [==============================] - 1s 24ms/step - loss: 7.0749e-04 - val_loss: 0.0036 Epoch 23/100 57/57 [==============================] - 1s 23ms/step - loss: 6.6179e-04 - val_loss: 0.0012 Epoch 24/100 57/57 [==============================] - 1s 24ms/step - loss: 6.3654e-04 - val_loss: 7.1714e-04 Epoch 25/100 57/57 [==============================] - 1s 24ms/step - loss: 5.8780e-04 - val_loss: 6.5300e-04 Epoch 26/100 57/57 [==============================] - 1s 24ms/step - loss: 5.6962e-04 - val_loss: 8.3446e-04 Epoch 27/100 57/57 [==============================] - 1s 24ms/step - loss: 6.0373e-04 - val_loss: 7.6859e-04 Epoch 28/100 57/57 [==============================] - 1s 25ms/step - loss: 5.4184e-04 - val_loss: 5.7977e-04 Epoch 29/100 57/57 [==============================] - 1s 24ms/step - loss: 5.6801e-04 - val_loss: 0.0021 Epoch 30/100 57/57 [==============================] - 1s 24ms/step - loss: 5.7730e-04 - val_loss: 0.0035 Epoch 31/100 57/57 [==============================] - 1s 24ms/step - loss: 4.8358e-04 - val_loss: 0.0011 Epoch 32/100 57/57 [==============================] - 2s 27ms/step - loss: 4.8153e-04 - val_loss: 7.5185e-04 Epoch 33/100 57/57 [==============================] - 2s 27ms/step - loss: 4.8292e-04 - val_loss: 0.0011 Epoch 34/100 57/57 [==============================] - 2s 27ms/step - loss: 4.9850e-04 - val_loss: 8.0108e-04 Epoch 35/100 57/57 [==============================] - 2s 28ms/step - loss: 4.3630e-04 - val_loss: 4.8953e-04 Epoch 36/100 57/57 [==============================] - 2s 27ms/step - loss: 4.5397e-04 - val_loss: 7.6943e-04 Epoch 37/100 57/57 [==============================] - 2s 27ms/step - loss: 4.4764e-04 - val_loss: 0.0011 Epoch 38/100 57/57 [==============================] - 2s 28ms/step - loss: 4.2593e-04 - val_loss: 4.8475e-04 Epoch 39/100 57/57 [==============================] - 2s 28ms/step - loss: 4.4397e-04 - val_loss: 7.5298e-04 Epoch 40/100 57/57 [==============================] - 2s 28ms/step - loss: 3.9450e-04 - val_loss: 0.0016 Epoch 41/100 57/57 [==============================] - 2s 28ms/step - loss: 4.5061e-04 - val_loss: 0.0013 Epoch 42/100 57/57 [==============================] - 2s 27ms/step - loss: 4.4132e-04 - val_loss: 5.6962e-04 Epoch 43/100 57/57 [==============================] - 2s 28ms/step - loss: 4.3400e-04 - val_loss: 8.6920e-04 Epoch 44/100 57/57 [==============================] - 2s 27ms/step - loss: 3.8915e-04 - val_loss: 0.0015 Epoch 45/100 57/57 [==============================] - 2s 27ms/step - loss: 3.9985e-04 - val_loss: 0.0016 Epoch 46/100 57/57 [==============================] - 2s 27ms/step - loss: 3.9131e-04 - val_loss: 4.3715e-04 Epoch 47/100 57/57 [==============================] - 2s 27ms/step - loss: 3.8516e-04 - val_loss: 6.2513e-04 Epoch 48/100 57/57 [==============================] - 2s 28ms/step - loss: 3.8856e-04 - val_loss: 6.3549e-04 Epoch 49/100 57/57 [==============================] - 2s 27ms/step - loss: 4.0803e-04 - val_loss: 6.9909e-04 Epoch 50/100 57/57 [==============================] - 2s 27ms/step - loss: 4.0064e-04 - val_loss: 0.0022 Epoch 51/100 57/57 [==============================] - 2s 27ms/step - loss: 3.6760e-04 - val_loss: 4.0346e-04 Epoch 52/100 57/57 [==============================] - 2s 27ms/step - loss: 3.7670e-04 - val_loss: 8.0508e-04 Epoch 53/100 57/57 [==============================] - 2s 27ms/step - loss: 3.9973e-04 - val_loss: 0.0010 Epoch 54/100 57/57 [==============================] - 2s 28ms/step - loss: 3.3143e-04 - val_loss: 3.8222e-04 Epoch 55/100 57/57 [==============================] - 2s 28ms/step - loss: 3.7577e-04 - val_loss: 4.4995e-04 Epoch 56/100 57/57 [==============================] - 2s 27ms/step - loss: 3.6030e-04 - val_loss: 0.0017 Epoch 57/100 57/57 [==============================] - 2s 28ms/step - loss: 3.3792e-04 - val_loss: 0.0012 Epoch 58/100 57/57 [==============================] - 1s 24ms/step - loss: 3.3102e-04 - val_loss: 0.0010 Epoch 59/100 57/57 [==============================] - 1s 24ms/step - loss: 3.4371e-04 - val_loss: 6.0616e-04 Epoch 60/100 57/57 [==============================] - 1s 24ms/step - loss: 3.9870e-04 - val_loss: 0.0028 Epoch 61/100 57/57 [==============================] - 1s 24ms/step - loss: 3.2062e-04 - val_loss: 8.2139e-04 Epoch 62/100 57/57 [==============================] - 1s 24ms/step - loss: 3.1101e-04 - val_loss: 0.0014 Epoch 63/100 57/57 [==============================] - 1s 24ms/step - loss: 3.7520e-04 - val_loss: 6.1046e-04 Epoch 64/100 57/57 [==============================] - 1s 24ms/step - loss: 3.2823e-04 - val_loss: 6.9230e-04
9/9 [==============================] - 0s 8ms/step MAPE: 1.88% RMSE: 0.02 MAE: 0.01 R² Score: 0.96 SMAPE: 1.87% Explained Variance Score: 0.96
In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
import warnings
warnings.filterwarnings('ignore')
# === 数据加载与预处理 ===
# 读取新数据
df = pd.read_csv('XAU_1d_data_2004_to_2024-09-20.csv') # 替换为你的文件路径
df.columns = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume'] # 确保列名正确
# 合并日期和时间列,并转换为datetime格式
df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
df.sort_values(by='Datetime', ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)
# 转换为浮点数
NumCols = ['Open', 'High', 'Low', 'Close', 'Volume']
df[NumCols] = df[NumCols].replace({',': ''}, regex=True).astype('float64')
# 缺失值检测
if df.isnull().sum().sum() > 0:
print("数据中存在缺失值!请检查。")
else:
print("数据无缺失值。")
# 可视化黄金价格走势
fig = px.line(y=df['Close'], x=df['Datetime'], title="Gold Price Historical Data")
fig.update_traces(line_color='blue')
fig.update_layout(plot_bgcolor='rgba(255,255,255,0.8)', xaxis_title="Datetime", yaxis_title="Price")
fig.show()
# === 数据切分 ===
# 设置训练集和测试集划分时间点
test_size = int(0.2 * len(df)) # 测试集为数据的20%
scaler = MinMaxScaler()
scaler.fit(df['Close'].values.reshape(-1, 1))
train_data = scaler.transform(df['Close'][:-test_size].values.reshape(-1, 1))
test_data = scaler.transform(df['Close'][-test_size - 60:].values.reshape(-1, 1))
# 滑动窗口构造
window_size = 60
X_train, y_train, X_test, y_test = [], [], [], []
for i in range(window_size, len(train_data)):
X_train.append(train_data[i - window_size:i, 0])
y_train.append(train_data[i, 0])
for i in range(window_size, len(test_data)):
X_test.append(test_data[i - window_size:i, 0])
y_test.append(test_data[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
# 调整形状
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
# === 模型构建 ===
def build_model():
model = Sequential()
model.add(LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=32, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
return model
model = build_model()
# 训练
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=1)
# === 模型评估 ===
y_pred = model.predict(X_test)
MAPE = mean_absolute_percentage_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
print(f"MAPE: {MAPE * 100:.2f}%")
print(f"RMSE: {RMSE:.2f}")
# === 可视化结果 ===
y_test_true = scaler.inverse_transform(y_test.reshape(-1, 1))
y_test_pred = scaler.inverse_transform(y_pred)
plt.figure(figsize=(15, 6))
plt.plot(df['Datetime'][:-test_size], scaler.inverse_transform(train_data), color='black', lw=2)
plt.plot(df['Datetime'][-test_size:], y_test_true, color='blue', lw=2)
plt.plot(df['Datetime'][-test_size:], y_test_pred, color='red', lw=2)
plt.title('Gold Price Prediction Model Performance', fontsize=15)
plt.xlabel('Datetime', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Training Data', 'Actual Test Data', 'Predicted Test Data'], loc='upper left', prop={'size': 15})
plt.grid(color='white')
plt.show()
数据无缺失值。
Epoch 1/50 103/103 [==============================] - 5s 28ms/step - loss: 0.0085 - val_loss: 5.7880e-04 Epoch 2/50 103/103 [==============================] - 2s 24ms/step - loss: 0.0016 - val_loss: 4.0505e-04 Epoch 3/50 103/103 [==============================] - 2s 24ms/step - loss: 0.0013 - val_loss: 4.0723e-04 Epoch 4/50 103/103 [==============================] - 2s 24ms/step - loss: 0.0014 - val_loss: 5.3618e-04 Epoch 5/50 103/103 [==============================] - 2s 24ms/step - loss: 0.0012 - val_loss: 2.9975e-04 Epoch 6/50 103/103 [==============================] - 3s 24ms/step - loss: 0.0012 - val_loss: 2.1895e-04 Epoch 7/50 103/103 [==============================] - 3s 24ms/step - loss: 0.0011 - val_loss: 1.8931e-04 Epoch 8/50 103/103 [==============================] - 3s 24ms/step - loss: 9.8878e-04 - val_loss: 1.8997e-04 Epoch 9/50 103/103 [==============================] - 3s 25ms/step - loss: 9.9088e-04 - val_loss: 2.2553e-04 Epoch 10/50 103/103 [==============================] - 3s 25ms/step - loss: 9.3725e-04 - val_loss: 5.0751e-04 Epoch 11/50 103/103 [==============================] - 2s 24ms/step - loss: 8.9388e-04 - val_loss: 2.2041e-04 Epoch 12/50 103/103 [==============================] - 2s 24ms/step - loss: 8.4638e-04 - val_loss: 3.3183e-04 33/33 [==============================] - 1s 8ms/step MAPE: 2.06% RMSE: 0.02
In [8]:
from sklearn.metrics import r2_score, mean_absolute_error
# 模型预测
y_pred = model.predict(X_test)
# 反标准化
y_test_true = scaler.inverse_transform(y_test.reshape(-1, 1))
y_test_pred = scaler.inverse_transform(y_pred)
# 评估指标
MAPE = mean_absolute_percentage_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)
R2 = r2_score(y_test, y_pred)
print(f"MAPE: {MAPE * 100:.2f}%")
print(f"MSE: {MSE:.2f}")
print(f"RMSE: {RMSE:.2f}")
print(f"MAE: {MAE:.2f}")
print(f"R^2 Score: {R2:.2f}")
33/33 [==============================] - 0s 8ms/step MAPE: 2.06% MSE: 0.00 RMSE: 0.02 MAE: 0.01 R^2 Score: 0.95