In [1]:
# ==================================================
# 论文复现:机器学习解码投资者预期协同(最终修复版)
# 彻底修复矩阵维度不匹配问题,VS Code直接运行
# ==================================================
import os
import random
import logging
from datetime import datetime
import pandas as pd
import numpy as np
# ===================== 全局参数 =====================
STOCK_NUM = 30
FEATURE_DIM = 105
INVESTOR_NUM = 100
HIDDEN_SIZE = 100
WINDOW_SIZE = 12
TRAIN_EPOCH = 30
LEARNING_RATE = 0.01
LOG_DIR = "run_log"
RESULT_FILE = "ied_result.csv"
np.random.seed(2025)
random.seed(2025)
# ===================== 日志初始化 =====================
def init_logger():
os.makedirs(LOG_DIR, exist_ok=True)
log_path = os.path.join(LOG_DIR, f"run_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[
logging.FileHandler(log_path, encoding="utf-8"),
logging.StreamHandler()
]
)
logger = logging.getLogger()
logger.info("=" * 60)
logger.info("投资者预期协同模型 开始运行")
logger.info(f"参数:{INVESTOR_NUM}个投资者 | {FEATURE_DIM}维特征 | {WINDOW_SIZE}月滚动窗口")
logger.info("=" * 60)
return logger
logger = init_logger()
# ===================== 基础函数 =====================
def relu(x):
return np.maximum(0, x)
def mse_loss(y_pred, y_true):
return np.mean((y_pred - y_true) ** 2)
# ===================== 神经网络(维度完全对齐版) =====================
class InvestorNet:
def __init__(self, input_dim, hidden_dim):
self.W1 = np.random.randn(input_dim, hidden_dim) * 0.01
self.b1 = np.zeros((1, hidden_dim))
self.W2 = np.random.randn(hidden_dim, 1) * 0.01
self.b2 = np.zeros((1, 1))
def forward(self, x):
# 前向传播全程保持二维矩阵运算,避免维度混乱
self.hidden = relu(np.dot(x, self.W1) + self.b1)
output = np.dot(self.hidden, self.W2) + self.b2
return output # 返回形状 (样本数, 1)
def backward(self, x, y_pred, y_true):
n = len(x)
# 强制统一为二维列向量,彻底解决维度不匹配
y_true = y_true.reshape(-1, 1)
d_out = (y_pred - y_true) / n # 形状 (n, 1)
# 输出层梯度
d_W2 = np.dot(self.hidden.T, d_out)
d_b2 = np.sum(d_out, axis=0, keepdims=True)
# 隐藏层梯度
d_hidden = np.dot(d_out, self.W2.T) # 形状 (n, hidden)
d_hidden[self.hidden <= 0] = 0
d_W1 = np.dot(x.T, d_hidden)
d_b1 = np.sum(d_hidden, axis=0, keepdims=True)
# 参数更新
self.W1 -= LEARNING_RATE * d_W1
self.b1 -= LEARNING_RATE * d_b1
self.W2 -= LEARNING_RATE * d_W2
self.b2 -= LEARNING_RATE * d_b2
def train(self, x_train, y_train):
for _ in range(TRAIN_EPOCH):
pred = self.forward(x_train)
loss = mse_loss(pred, y_train)
self.backward(x_train, pred, y_train)
return loss
# ===================== 生成模拟数据 =====================
def generate_simulate_data():
logger.info("正在生成模拟股票面板数据...")
data_list = []
for year in range(2020, 2023):
for month in range(1, 13):
for stock_id in range(STOCK_NUM):
row = {
"year": year,
"month": month,
"stock_id": f"stock_{stock_id:03d}",
"ret": np.random.normal(0, 0.05)
}
for f in range(1, FEATURE_DIM + 1):
row[f"feat_{f}"] = np.random.randn()
data_list.append(row)
df = pd.DataFrame(data_list)
df = df.sort_values(["year", "month", "stock_id"]).reset_index(drop=True)
logger.info(f"模拟数据生成完成,共 {len(df)} 条样本,{STOCK_NUM}只股票")
return df
# ===================== 数据预处理 =====================
def preprocess_data(df):
feature_cols = [f"feat_{i}" for i in range(1, FEATURE_DIM + 1)]
df[feature_cols] = df[feature_cols].fillna(0)
for col in feature_cols:
mean_val = df[col].mean()
std_val = df[col].std()
if std_val > 1e-6:
df[col] = (df[col] - mean_val) / std_val
X = df[feature_cols].values.astype(np.float32)
y = df["ret"].values.astype(np.float32)
info = df[["year", "month", "stock_id"]].copy()
logger.info("数据预处理完成")
return X, y, info
# ===================== 滚动窗口 + IED计算 =====================
def rolling_train_calc_ied(X, y, info_df):
logger.info("开始滚动窗口训练与IED计算...")
month_list = info_df[["year", "month"]].drop_duplicates().values.tolist()
total_window = len(month_list) - WINDOW_SIZE
logger.info(f"总滚动窗口数:{total_window} 个")
n_samples = len(X)
pred_matrix = np.zeros((n_samples, INVESTOR_NUM))
has_pred = np.zeros(n_samples, dtype=bool)
for win_idx in range(total_window):
train_months = month_list[win_idx : win_idx + WINDOW_SIZE]
test_month = month_list[win_idx + WINDOW_SIZE]
train_mask = info_df.apply(lambda r: [r["year"], r["month"]] in train_months, axis=1)
test_mask = info_df.apply(lambda r: [r["year"], r["month"]] == test_month, axis=1)
X_train, y_train = X[train_mask], y[train_mask]
X_test = X[test_mask]
test_indices = np.where(test_mask)[0]
for inv_id in range(INVESTOR_NUM):
model = InvestorNet(FEATURE_DIM, HIDDEN_SIZE)
model.train(X_train, y_train)
test_pred = model.forward(X_test)
pred_matrix[test_indices, inv_id] = test_pred.flatten()
has_pred[test_indices] = True
if win_idx % 3 == 0:
logger.info(f"进度:已完成 {win_idx}/{total_window} 个窗口")
logger.info("计算预期协同指标 IED...")
valid_mask = has_pred
result = info_df[valid_mask].copy()
result["mean_pred"] = np.mean(pred_matrix[valid_mask], axis=1)
result["IED"] = np.std(pred_matrix[valid_mask], axis=1, ddof=1)
result["true_ret"] = y[valid_mask]
return result
# ===================== 结果保存与统计 =====================
def save_and_summary(result_df):
result_df.to_csv(RESULT_FILE, index=False, encoding="utf-8-sig")
logger.info(f"结果已保存至:{RESULT_FILE}")
logger.info("\n========== 结果统计 ==========")
logger.info(f"IED 均值:{result_df['IED'].mean():.4f}")
logger.info(f"IED 标准差:{result_df['IED'].std():.4f}")
logger.info(f"IED 最小值:{result_df['IED'].min():.4f}")
logger.info(f"IED 最大值:{result_df['IED'].max():.4f}")
logger.info("IED越小 = 预期越一致 = 协同程度越高")
# ===================== 主程序 =====================
if __name__ == "__main__":
try:
raw_data = generate_simulate_data()
X_data, y_data, info_data = preprocess_data(raw_data)
result = rolling_train_calc_ied(X_data, y_data, info_data)
save_and_summary(result)
logger.info("\n程序运行完成!")
except Exception as e:
logger.error(f"运行出错:{str(e)}", exc_info=True)
2026-06-27 05:24:49 | ============================================================ 2026-06-27 05:24:49 | 投资者预期协同模型 开始运行 2026-06-27 05:24:49 | 参数:100个投资者 | 105维特征 | 12月滚动窗口 2026-06-27 05:24:49 | ============================================================ 2026-06-27 05:24:49 | 正在生成模拟股票面板数据... 2026-06-27 05:24:49 | 模拟数据生成完成,共 1080 条样本,30只股票 2026-06-27 05:24:49 | 数据预处理完成 2026-06-27 05:24:49 | 开始滚动窗口训练与IED计算... 2026-06-27 05:24:49 | 总滚动窗口数:24 个 2026-06-27 05:24:55 | 进度:已完成 0/24 个窗口 2026-06-27 05:25:10 | 进度:已完成 3/24 个窗口 2026-06-27 05:25:26 | 进度:已完成 6/24 个窗口 2026-06-27 05:25:41 | 进度:已完成 9/24 个窗口 2026-06-27 05:25:57 | 进度:已完成 12/24 个窗口 2026-06-27 05:26:13 | 进度:已完成 15/24 个窗口 2026-06-27 05:26:29 | 进度:已完成 18/24 个窗口 2026-06-27 05:26:45 | 进度:已完成 21/24 个窗口 2026-06-27 05:26:56 | 计算预期协同指标 IED... 2026-06-27 05:26:56 | 结果已保存至:ied_result.csv 2026-06-27 05:26:56 | ========== 结果统计 ========== 2026-06-27 05:26:56 | IED 均值:0.0066 2026-06-27 05:26:56 | IED 标准差:0.0007 2026-06-27 05:26:56 | IED 最小值:0.0046 2026-06-27 05:26:56 | IED 最大值:0.0091 2026-06-27 05:26:56 | IED越小 = 预期越一致 = 协同程度越高 2026-06-27 05:26:56 | 程序运行完成!