46 lines
1.9 KiB
Python
46 lines
1.9 KiB
Python
import pandas as pd
|
|
|
|
# 读取Excel文件
|
|
file_path = "/home/baol/tools/数据草稿-案款1.xls" # 替换为你的Excel文件路径
|
|
|
|
# 读取“账户收”工作表,只保留指定列
|
|
sheet_account_receipt = pd.read_excel(file_path, sheet_name="一案一账户支")
|
|
sheet_account_receipt = sheet_account_receipt[
|
|
["支款案号", "来源案号", "申请人", "支付日期", "支付金额", "领款人"]
|
|
]
|
|
|
|
# 读取“收入”工作表,表头从第二行开始,只保留指定列
|
|
sheet_income = pd.read_excel(file_path, sheet_name="银行支出", header=1)
|
|
sheet_income = sheet_income[["对方户名", "交易时间", "支出金额"]]
|
|
|
|
# 统一日期格式
|
|
sheet_account_receipt["支付日期"] = pd.to_datetime(
|
|
sheet_account_receipt["支付日期"]
|
|
).dt.strftime("%Y-%m-%d")
|
|
sheet_income["交易时间"] = pd.to_datetime(sheet_income["交易时间"]).dt.strftime(
|
|
"%Y-%m-%d"
|
|
)
|
|
|
|
# 合并两个数据框,保留各自的列名
|
|
merged_df = pd.merge(
|
|
sheet_account_receipt,
|
|
sheet_income,
|
|
left_on=["支付金额", "支付日期"], # 账户收的匹配列
|
|
right_on=["支出金额", "交易时间"], # 收入的匹配列
|
|
how="outer", # 外连接以保留未匹配的记录
|
|
suffixes=("_一案一账户支", "_银行支出"), # 为重复列添加后缀
|
|
)
|
|
|
|
# 筛选出未匹配的记录
|
|
unmatched_records = merged_df[merged_df["支付金额"].isna() | merged_df["支出金额"].isna()]
|
|
|
|
# 将结果保存为新的Excel文件
|
|
output_file_path_merged = "/home/baol/tools/merged_out_records.xlsx" # 合并后的记录
|
|
output_file_path_unmatched = "/home/baol/tools/unmatched_out_records.xlsx" # 未匹配的记录
|
|
|
|
merged_df.to_excel(output_file_path_merged, index=False, engine="openpyxl")
|
|
unmatched_records.to_excel(output_file_path_unmatched, index=False, engine="openpyxl")
|
|
|
|
print(f"合并后的记录已保存到 {output_file_path_merged}")
|
|
print(f"未匹配的记录已保存到 {output_file_path_unmatched}")
|