In [1]:
#!/usr/bin/env python
# coding: utf-8
In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
accuracy_score,
precision_score,
recall_score,
f1_score
)
print("=" * 50)
print("联邦学习金融反欺诈实验开始")
print("=" * 50)
#1. 构造模拟数据
np.random.seed(42)
n = 5000
age = np.random.randint(18, 60, n)
income = np.random.randint(
3000,
30000,
n
)
loan_amount = np.random.randint(
1000,
50000,
n
)
credit_score = np.random.randint(
300,
850,
n
)
late_payment = np.random.randint(
0,
10,
n
)
fraud = (
(credit_score < 500)
& (late_payment > 3)
).astype(int)
data = pd.DataFrame({
"age": age,
"income": income,
"loan_amount": loan_amount,
"credit_score": credit_score,
"late_payment": late_payment,
"fraud": fraud
})
print("数据集生成成功")
print("样本数量:", len(data))
#2. 划分训练集测试集
X = data[
[
"age",
"income",
"loan_amount",
"credit_score",
"late_payment"
]
]
y = data["fraud"]
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.2,
random_state=42
)
#3.模拟两个机构
mid = len(X_train) // 2
X_A = X_train.iloc[:mid]
y_A = y_train.iloc[:mid]
X_B = X_train.iloc[mid:]
y_B = y_train.iloc[mid:]
print("机构A训练样本:", len(X_A))
print("机构B训练样本:", len(X_B))
#4. 本地训练
model_A = LogisticRegression(
max_iter=1000
)
model_B = LogisticRegression(
max_iter=1000
)
model_A.fit(X_A, y_A)
model_B.fit(X_B, y_B)
print("本地模型训练完成")
#5. 联邦参数聚合
coef_avg = (
model_A.coef_
+ model_B.coef_
) / 2
intercept_avg = (
model_A.intercept_
+ model_B.intercept_
) / 2
federated_model = LogisticRegression(
max_iter=1000
)
federated_model.fit(
X_train,
y_train
)
federated_model.coef_ = coef_avg
federated_model.intercept_ = intercept_avg
print("联邦聚合完成")
#6. 测试
pred = federated_model.predict(
X_test
)
acc = accuracy_score(
y_test,
pred
)
pre = precision_score(
y_test,
pred
)
rec = recall_score(
y_test,
pred
)
f1 = f1_score(
y_test,
pred
)
print("\n实验结果")
print("-" * 30)
print("Accuracy =", round(acc,4))
print("Precision =", round(pre,4))
print("Recall =", round(rec,4))
print("F1-score =", round(f1,4))
print("\n实验结束")
================================================== 联邦学习金融反欺诈实验开始 ================================================== 数据集生成成功 样本数量: 5000 机构A训练样本: 2000 机构B训练样本: 2000 本地模型训练完成 联邦聚合完成 实验结果 ------------------------------ Accuracy = 0.928 Precision = 0.823 Recall = 0.8309 F1-score = 0.8269 实验结束
In [ ]: