105 lines
3.9 KiB
Python
105 lines
3.9 KiB
Python
import pandas as pd
|
||
|
||
# 读取Excel文件的指定工作表和列
|
||
df1 = pd.read_excel(
|
||
"/home/baol/tools/数据草稿-案款1.xls",
|
||
sheet_name="一案一账户支",
|
||
usecols=["支款案号", "来源案号", "申请人", "支付日期", "支付金额", "领款人"],
|
||
)
|
||
df2 = pd.read_excel(
|
||
"/home/baol/tools/数据草稿-案款1.xls",
|
||
sheet_name="银行支出",
|
||
usecols=["对方户名", "交易时间", "支出金额"],
|
||
header=1,
|
||
)
|
||
|
||
# 标准化日期格式为 yyyy-mm-dd
|
||
df1["支付日期"] = pd.to_datetime(df1["支付日期"]).dt.strftime("%Y-%m-%d")
|
||
df2["交易时间"] = pd.to_datetime(df2["交易时间"]).dt.strftime("%Y-%m-%d")
|
||
|
||
# 确保金额为数值类型
|
||
df1["支付金额"] = pd.to_numeric(df1["支付金额"], errors="coerce")
|
||
df2["支出金额"] = pd.to_numeric(df2["支出金额"], errors="coerce")
|
||
|
||
# 初始化任务标记列
|
||
df1["任务"] = "任务3"
|
||
df2["任务"] = "任务3"
|
||
|
||
# 任务2:查找在某一天有多笔金额相同的记录
|
||
# 标记df1中的重复记录
|
||
df1_duplicates = df1[df1.duplicated(subset=["支付日期", "支付金额"], keep=False)]
|
||
df1.loc[df1_duplicates.index, "任务"] = "任务2"
|
||
|
||
# 标记df2中的重复记录
|
||
df2_duplicates = df2[df2.duplicated(subset=["交易时间", "支出金额"], keep=False)]
|
||
df2.loc[df2_duplicates.index, "任务"] = "任务2"
|
||
|
||
# 为匹配准备唯一记录(任务1)
|
||
df1_unique = (
|
||
df1[df1["任务"] == "任务3"].reset_index().rename(columns={"index": "df1_index"})
|
||
)
|
||
df2_unique = (
|
||
df2[df2["任务"] == "任务3"].reset_index().rename(columns={"index": "df2_index"})
|
||
)
|
||
|
||
# 创建辅助列用于匹配
|
||
df1_unique["日期"] = df1_unique["支付日期"]
|
||
df1_unique["金额"] = df1_unique["支付金额"]
|
||
df1_unique["名称"] = df1_unique["领款人"]
|
||
|
||
df2_unique["日期"] = df2_unique["交易时间"]
|
||
df2_unique["金额"] = df2_unique["支出金额"]
|
||
df2_unique["名称"] = df2_unique["对方户名"]
|
||
|
||
# 条件1:支付金额与支出金额匹配,支付日期与交易时间匹配
|
||
condition1_matches = pd.merge(
|
||
df1_unique, df2_unique, on=["日期", "金额"], how="inner", suffixes=("_df1", "_df2")
|
||
)
|
||
df1.loc[condition1_matches["df1_index"], "任务"] = "任务1"
|
||
df2.loc[condition1_matches["df2_index"], "任务"] = "任务1"
|
||
|
||
# 条件2:支付金额与支出金额匹配,领款人与对方户名匹配
|
||
condition2_matches = pd.merge(
|
||
df1_unique, df2_unique, on=["金额", "名称"], how="inner", suffixes=("_df1", "_df2")
|
||
)
|
||
df1.loc[condition2_matches["df1_index"], "任务"] = "任务1"
|
||
df2.loc[condition2_matches["df2_index"], "任务"] = "任务1"
|
||
|
||
# 合并条件匹配的结果
|
||
task1_matches = pd.concat([condition1_matches, condition2_matches], ignore_index=True)
|
||
task1_matches = task1_matches.drop_duplicates(subset=["df1_index", "df2_index"])
|
||
|
||
# 任务3:提取不符合任务1和任务2的记录
|
||
task3_df1 = df1[df1["任务"] == "任务3"]
|
||
task3_df2 = df2[df2["任务"] == "任务3"]
|
||
|
||
# 创建一个新的Excel文件,保存结果
|
||
with pd.ExcelWriter("/home/baol/tools/匹配结果.xlsx") as writer:
|
||
# 任务1结果
|
||
task1_result = task1_matches[
|
||
[
|
||
"支款案号",
|
||
"来源案号",
|
||
"申请人",
|
||
"支付日期",
|
||
"支付金额",
|
||
"领款人",
|
||
"对方户名",
|
||
"交易时间",
|
||
"支出金额",
|
||
]
|
||
]
|
||
task1_result.to_excel(writer, sheet_name="任务1匹配结果", index=False)
|
||
|
||
# 任务2结果
|
||
df1_duplicates.to_excel(
|
||
writer, sheet_name="任务2_一案一账户支重复记录", index=False
|
||
)
|
||
df2_duplicates.to_excel(writer, sheet_name="任务2_银行支出重复记录", index=False)
|
||
|
||
# 任务3结果
|
||
task3_df1.to_excel(writer, sheet_name="任务3_一案一账户支未匹配记录", index=False)
|
||
task3_df2.to_excel(writer, sheet_name="任务3_支出未匹配记录", index=False)
|
||
|
||
print("结果已保存到 '匹配结果.xlsx' 文件中。")
|