river-sim/case_int.py
2024-12-25 16:38:07 +08:00

100 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
# 1. 读取 Excel 文件中的数据
df_case = pd.read_excel(
"/home/baol/tools/数据草稿-案款1.xls", sheet_name="一案一账户收"
)
df_bank = pd.read_excel(
"/home/baol/tools/数据草稿-案款1.xls", sheet_name="银行-收入", header=1
)
# 2. 保留指定的列
# 在“一案一账户收”中,保留归属本案金额、到账金额、到账日期、缴款人、案号
df_case = df_case[["归属本案金额", "到账金额", "到账日期", "缴款人", "案号"]]
# 在“银行-收入”中,保留对方户名、交易时间、金额
df_bank = df_bank[["对方户名", "交易时间", "金额"]]
# 3. 统一日期格式为 yyyy-mm-dd
df_case["到账日期"] = pd.to_datetime(df_case["到账日期"]).dt.strftime("%Y-%m-%d")
df_bank["交易时间"] = pd.to_datetime(df_bank["交易时间"]).dt.strftime("%Y-%m-%d")
# 4. 任务1检索到账金额与金额匹配且到账日期与交易时间匹配的记录但排除在同一天有多笔金额相同的情况
# 合并两个数据框,匹配到账金额=金额,到账日期=交易时间
df_merged = pd.merge(
df_case,
df_bank,
left_on=["到账金额", "到账日期"],
right_on=["金额", "交易时间"],
how="inner",
)
# 计算每个日期和金额组合的出现次数
case_counts = (
df_case.groupby(["到账日期", "到账金额"]).size().reset_index(name="case_count")
)
bank_counts = (
df_bank.groupby(["交易时间", "金额"]).size().reset_index(name="bank_count")
)
# 合并统计数据
df_merged = pd.merge(
df_merged,
case_counts,
left_on=["到账日期", "到账金额"],
right_on=["到账日期", "到账金额"],
)
df_merged = pd.merge(
df_merged, bank_counts, left_on=["交易时间", "金额"], right_on=["交易时间", "金额"]
)
# 过滤出在同一天没有多笔金额相同的记录即计数为1
task1_result = df_merged[
(df_merged["case_count"] == 1) & (df_merged["bank_count"] == 1)
]
# 获取任务1中“一案一账户收”和“银行-收入”的索引
task1_case_indices = df_case[
df_case[["到账金额", "到账日期"]]
.apply(tuple, axis=1)
.isin(task1_result[["到账金额", "到账日期"]].apply(tuple, axis=1))
].index
task1_bank_indices = df_bank[
df_bank[["金额", "交易时间"]]
.apply(tuple, axis=1)
.isin(task1_result[["金额", "交易时间"]].apply(tuple, axis=1))
].index
# 5. 任务2检索出在某一天有多笔金额相同的记录
# 找出“一案一账户收”中同一天金额相同且次数大于1的记录
task2_case = df_case.groupby(["到账日期", "到账金额"]).filter(lambda x: len(x) > 1)
task2_case_indices = task2_case.index
# 找出“银行-收入”中同一天金额相同且次数大于1的记录
task2_bank = df_bank.groupby(["交易时间", "金额"]).filter(lambda x: len(x) > 1)
task2_bank_indices = task2_bank.index
# 6. 任务3检索出不符合任务1和任务2的记录
# 在“一案一账户收”中获取不在任务1和任务2中的索引
task3_case_indices = df_case.index.difference(
task1_case_indices.union(task2_case_indices)
)
task3_case_result = df_case.loc[task3_case_indices]
# 在“银行-收入”中获取不在任务1和任务2中的索引
task3_bank_indices = df_bank.index.difference(
task1_bank_indices.union(task2_bank_indices)
)
task3_bank_result = df_bank.loc[task3_bank_indices]
# 7. 将结果保存到 Excel 文件
with pd.ExcelWriter("/home/baol/tools/任务结果.xlsx") as writer:
task1_result.to_excel(writer, sheet_name="任务1", index=False)
task2_case.to_excel(writer, sheet_name="任务2_一案一账户收", index=False)
task2_bank.to_excel(writer, sheet_name="任务2_银行收入", index=False)
task3_case_result.to_excel(writer, sheet_name="任务3_一案一账户收", index=False)
task3_bank_result.to_excel(writer, sheet_name="任务3_银行收入", index=False)
print("所有任务结果已保存到 '任务结果.xlsx' 文件中。")