In [3]:
import pandas as pd
# 读取第一个数据文件
data1 = pd.read_csv("XAU_1d_data_2004_to_2024-09-20.csv") # 请修改为你的实际路径
data1['Date'] = pd.to_datetime(data1['Date'], format='%Y.%m.%d')
# 读取第二个数据文件
data2 = pd.read_csv("Gold Price (2013-2022).csv") # 请修改为你的实际路径
data2['Date'] = pd.to_datetime(data2['Date'], format='%m/%d/%Y')
# 设置日期范围
start_date = pd.to_datetime("2022-12-31")
end_date = pd.to_datetime("2024-09-19")
# 筛选第一个数据文件中日期范围在2022-12-31到2024-09-19的数据
data1_filtered = data1[(data1['Date'] >= start_date) & (data1['Date'] <= end_date)]
# 构建第二个文件的空数据框,以便合并
# 需要的字段包括:Date, Price, Open, High, Low, Vol., Change %
columns = ["Date", "Price", "Open", "High", "Low", "Vol.", "Change %"]
data2_extended = pd.DataFrame(columns=columns)
# 填充数据
for date in data1_filtered['Date']:
row = {
"Date": date,
"Price": 0, # 没有Price字段时填充为0
"Open": data1_filtered[data1_filtered['Date'] == date]['Open'].values[0],
"High": data1_filtered[data1_filtered['Date'] == date]['High'].values[0],
"Low": data1_filtered[data1_filtered['Date'] == date]['Low'].values[0],
"Vol.": 0, # 没有Vol.字段时填充为0
"Change %": 0 # 没有Change %字段时填充为0
}
data2_extended = pd.concat([data2_extended, pd.DataFrame([row])], ignore_index=True)
# 合并第二个数据文件与扩展的部分
data2_combined = pd.concat([data2, data2_extended], ignore_index=True)
# 保存结果
data2_combined.to_csv("combined_data.csv", index=False)
C:\Users\黄清枫\AppData\Local\Temp\ipykernel_10340\2005080117.py:34: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation. data2_extended = pd.concat([data2_extended, pd.DataFrame([row])], ignore_index=True)
In [17]:
import pandas as pd
# 读取文件
file1 = pd.read_csv('Gold Price (2013-2022).csv') # 文件1:包含"Date", "Price", "Open", "High", "Low", "Vol.", "Change %"
file2 = pd.read_csv('Gold_Data.csv') # 文件2:包含"Date", "Adj_Close", "Close", "High", "Low", "Open", "Volume"
# 日期自动转换为datetime格式
file1['Date'] = pd.to_datetime(file1['Date'], errors='coerce')
file2['Date'] = pd.to_datetime(file2['Date'], errors='coerce')
# 去掉时区信息,确保日期列格式一致
file2['Date'] = file2['Date'].dt.tz_localize(None)
# 检查日期范围
file1_date_range = file1['Date'].min(), file1['Date'].max()
file2_date_range = file2['Date'].min(), file2['Date'].max()
print(f"File1 Date Range: {file1_date_range}")
print(f"File2 Date Range: {file2_date_range}")
# 将第二个文件的 'Adj_Close' 列映射为 'Price'
file2 = file2.rename(columns={'Adj_Close': 'Price', 'Volume': 'Vol.'})
# 只保留相关列并确保字段一致
file2 = file2[['Date', 'Price', 'Open', 'High', 'Low', 'Vol.']]
# 合并数据
merged_data = pd.merge(file1, file2, on='Date', how='right', suffixes=('_file1', '_file2'))
# 输出合并后的列
print("Merged Data Columns: ", merged_data.columns)
# 使用来自file2的Price列('Price_file2')替换Price列
merged_data['Price'] = merged_data['Price_file2']
merged_data.drop(columns=['Price_file2'], inplace=True) # 删除额外的列
# 填补缺失的列,保留最终文件1的字段格式
merged_data['Open'] = merged_data['Open_file2'].fillna(merged_data['Open_file1'])
merged_data['High'] = merged_data['High_file2'].fillna(merged_data['High_file1'])
merged_data['Low'] = merged_data['Low_file2'].fillna(merged_data['Low_file1'])
merged_data['Vol.'] = merged_data['Vol._file2'].fillna(merged_data['Vol._file1'])
merged_data['Change %'] = 0 # 对于file2中没有的字段,填充为0
# 删除文件1中的额外列
merged_data.drop(columns=['Open_file1', 'High_file1', 'Low_file1', 'Vol._file1'], inplace=True)
# 只保留指定的列
final_columns = ["Date", "Price", "Open", "High", "Low", "Vol.", "Change %"]
merged_data = merged_data[final_columns]
# 输出合并后的结果
print(merged_data.head())
# 保存合并后的数据
merged_data.to_csv('merged_data.csv', index=False)
File1 Date Range: (Timestamp('2013-01-02 00:00:00'), Timestamp('2022-12-30 00:00:00'))
File2 Date Range: (Timestamp('2000-08-30 00:00:00'), Timestamp('2024-10-30 00:00:00'))
Merged Data Columns: Index(['Date', 'Price_file1', 'Open_file1', 'High_file1', 'Low_file1',
'Vol._file1', 'Change %', 'Price_file2', 'Open_file2', 'High_file2',
'Low_file2', 'Vol._file2'],
dtype='object')
Date Price Open High Low Vol. Change %
0 2000-08-30 273.899994 273.899994 273.899994 273.899994 0 0
1 2000-08-31 278.299988 274.799988 278.299988 274.799988 0 0
2 2000-09-01 277.000000 277.000000 277.000000 277.000000 0 0
3 2000-09-05 275.799988 275.799988 275.799988 275.799988 2 0
4 2000-09-06 274.200012 274.200012 274.200012 274.200012 0 0
In [ ]:
In [ ]: