# ==============================
# 文件名:train - MHLS-ALR
# 作者:刘丰芝
# 日期:2026-06-20
# 功能:实现ALR-LightGBM算法并评估,生成Word运行日志
# ==============================
import subprocess
subprocess.run(["pip", "install", "lightgbm", "python-docx", "openpyxl", "shap"], capture_output=True)
import os
import datetime
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.metrics import classification_report, confusion_matrix, recall_score, f1_score, balanced_accuracy_score
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
import shap
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# ========= 1. 读数据 =========
df = pd.read_excel("/kaggle/input/cr-v8-lf-data/train-MHLS-ALR.xlsx", engine="openpyxl")
X = df.drop(columns=["index", "三分类"])
y = df["三分类"]
# ========= 2. 划分数据集 =========
X_train, X_test, y_train, y_test = train_test_split(
X, y, stratify=y, test_size=0.2, random_state=42
)
# ========= 3. 定义类别特征 =========
categorical_columns = [
"性别", "婚姻状况", "教育水平",
"贷款用途", "贷款形式", "地区分类"
]
for col in categorical_columns:
if col in X_train.columns:
X_train[col] = X_train[col].astype("category")
X_test[col] = X_test[col].astype("category")
# ========= 4. 自适应学习率 =========
def adaptive_learning_rate(iteration):
base_lr = 0.1
decay_rate = 0.01
return base_lr / (1 + decay_rate * iteration)
# ========= 5. 训练 ALR-LightGBM =========
model = lgb.LGBMClassifier(
objective="multiclass",
num_class=3,
boosting_type="gbdt",
num_leaves=31,
n_estimators=1000,
class_weight="balanced",
random_state=42
)
model.fit(
X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric="multi_logloss",
categorical_feature=[c for c in categorical_columns if c in X_train.columns],
callbacks=[
lgb.reset_parameter(learning_rate=adaptive_learning_rate),
lgb.log_evaluation(period=100),
lgb.early_stopping(stopping_rounds=50)
]
)
# ========= 6. 评估 =========
y_pred_prob = model.predict_proba(X_test)
y_pred_label = np.argmax(y_pred_prob, axis=1)
print("\n🔎 分类报告:")
print(classification_report(y_test, y_pred_label, digits=4))
print("🔎 混淆矩阵:")
print(confusion_matrix(y_test, y_pred_label))
# ===== 二分类(违约 vs 非违约)=====
y_test_binary = np.where(y_test == 2, 0, 1)
y_pred_binary = np.where(y_pred_label == 2, 0, 1)
tn, fp, fn, tp = confusion_matrix(y_test_binary, y_pred_binary).ravel()
non_default_recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
default_recall = tn / (tn + fp) if (tn + fp) > 0 else 0.0
print("\n✅ 非违约识别率:", f"{non_default_recall:.4f}")
print("✅ 违约识别率:", f"{default_recall:.4f}")
# ========= 7. 生成 Word 运行日志 =========
report_dict = classification_report(y_test, y_pred_label, output_dict=True)
doc = Document()
style = doc.styles['Normal']
style.font.name = 'Times New Roman'
style.font.size = Pt(12)
heading = doc.add_heading('课程作业运行日志', 0)
heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.add_paragraph(f"作者:刘丰芝")
doc.add_paragraph(f"学号:[27123106]")
doc.add_paragraph(f"运行时间:{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
doc.add_paragraph(f"算法:MHLS + ALR-LightGBM")
doc.add_paragraph("-" * 50)
# 分类报告表格
table = doc.add_table(rows=1, cols=5)
table.style = 'Table Grid'
hdr_cells = table.rows[0].cells
headers = ['类别', 'Precision', 'Recall', 'F1-score', 'Support']
for i, h in enumerate(headers):
hdr_cells[i].text = h
for label in ['0', '1', '2']:
if label in report_dict:
row_cells = table.add_row().cells
row_cells[0].text = label
row_cells[1].text = f"{report_dict[label]['precision']:.4f}"
row_cells[2].text = f"{report_dict[label]['recall']:.4f}"
row_cells[3].text = f"{report_dict[label]['f1-score']:.4f}"
row_cells[4].text = str(int(report_dict[label]['support']))
# 混淆矩阵
doc.add_heading('混淆矩阵', level=2)
cm = confusion_matrix(y_test, y_pred_label)
cm_df = pd.DataFrame(cm, columns=['预测0', '预测1', '预测2'], index=['真实0', '真实1', '真实2'])
doc.add_paragraph(str(cm_df))
# 关键指标
doc.add_heading('关键指标', level=2)
doc.add_paragraph(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_label):.4f}")
doc.add_paragraph(f"Macro F1-score: {f1_score(y_test, y_pred_label, average='macro'):.4f}")
# 保存
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
save_path = f"运行日志/{timestamp}_课程作业运行日志.docx"
os.makedirs("运行日志", exist_ok=True)
doc.save(save_path)
print(f"✅ Word 日志已生成: {save_path}")