100 lines
3.9 KiB
Python
100 lines
3.9 KiB
Python
import pandas as pd
|
||
|
||
# 1. 读取 Excel 文件中的数据
|
||
df_case = pd.read_excel(
|
||
"/home/baol/tools/数据草稿-案款1.xls", sheet_name="一案一账户收"
|
||
)
|
||
df_bank = pd.read_excel(
|
||
"/home/baol/tools/数据草稿-案款1.xls", sheet_name="银行-收入", header=1
|
||
)
|
||
|
||
# 2. 保留指定的列
|
||
# 在“一案一账户收”中,保留归属本案金额、到账金额、到账日期、缴款人、案号
|
||
df_case = df_case[["归属本案金额", "到账金额", "到账日期", "缴款人", "案号"]]
|
||
# 在“银行-收入”中,保留对方户名、交易时间、金额
|
||
df_bank = df_bank[["对方户名", "交易时间", "金额"]]
|
||
|
||
# 3. 统一日期格式为 yyyy-mm-dd
|
||
df_case["到账日期"] = pd.to_datetime(df_case["到账日期"]).dt.strftime("%Y-%m-%d")
|
||
df_bank["交易时间"] = pd.to_datetime(df_bank["交易时间"]).dt.strftime("%Y-%m-%d")
|
||
|
||
# 4. 任务1:检索到账金额与金额匹配,且到账日期与交易时间匹配的记录,但排除在同一天有多笔金额相同的情况
|
||
# 合并两个数据框,匹配到账金额=金额,到账日期=交易时间
|
||
df_merged = pd.merge(
|
||
df_case,
|
||
df_bank,
|
||
left_on=["到账金额", "到账日期"],
|
||
right_on=["金额", "交易时间"],
|
||
how="inner",
|
||
)
|
||
|
||
# 计算每个日期和金额组合的出现次数
|
||
case_counts = (
|
||
df_case.groupby(["到账日期", "到账金额"]).size().reset_index(name="case_count")
|
||
)
|
||
bank_counts = (
|
||
df_bank.groupby(["交易时间", "金额"]).size().reset_index(name="bank_count")
|
||
)
|
||
|
||
# 合并统计数据
|
||
df_merged = pd.merge(
|
||
df_merged,
|
||
case_counts,
|
||
left_on=["到账日期", "到账金额"],
|
||
right_on=["到账日期", "到账金额"],
|
||
)
|
||
df_merged = pd.merge(
|
||
df_merged, bank_counts, left_on=["交易时间", "金额"], right_on=["交易时间", "金额"]
|
||
)
|
||
|
||
# 过滤出在同一天没有多笔金额相同的记录(即计数为1)
|
||
task1_result = df_merged[
|
||
(df_merged["case_count"] == 1) & (df_merged["bank_count"] == 1)
|
||
]
|
||
|
||
# 获取任务1中“一案一账户收”和“银行-收入”的索引
|
||
task1_case_indices = df_case[
|
||
df_case[["到账金额", "到账日期"]]
|
||
.apply(tuple, axis=1)
|
||
.isin(task1_result[["到账金额", "到账日期"]].apply(tuple, axis=1))
|
||
].index
|
||
|
||
task1_bank_indices = df_bank[
|
||
df_bank[["金额", "交易时间"]]
|
||
.apply(tuple, axis=1)
|
||
.isin(task1_result[["金额", "交易时间"]].apply(tuple, axis=1))
|
||
].index
|
||
|
||
# 5. 任务2:检索出在某一天有多笔金额相同的记录
|
||
# 找出“一案一账户收”中同一天金额相同且次数大于1的记录
|
||
task2_case = df_case.groupby(["到账日期", "到账金额"]).filter(lambda x: len(x) > 1)
|
||
task2_case_indices = task2_case.index
|
||
|
||
# 找出“银行-收入”中同一天金额相同且次数大于1的记录
|
||
task2_bank = df_bank.groupby(["交易时间", "金额"]).filter(lambda x: len(x) > 1)
|
||
task2_bank_indices = task2_bank.index
|
||
|
||
# 6. 任务3:检索出不符合任务1和任务2的记录
|
||
|
||
# 在“一案一账户收”中,获取不在任务1和任务2中的索引
|
||
task3_case_indices = df_case.index.difference(
|
||
task1_case_indices.union(task2_case_indices)
|
||
)
|
||
task3_case_result = df_case.loc[task3_case_indices]
|
||
|
||
# 在“银行-收入”中,获取不在任务1和任务2中的索引
|
||
task3_bank_indices = df_bank.index.difference(
|
||
task1_bank_indices.union(task2_bank_indices)
|
||
)
|
||
task3_bank_result = df_bank.loc[task3_bank_indices]
|
||
|
||
# 7. 将结果保存到 Excel 文件
|
||
with pd.ExcelWriter("/home/baol/tools/任务结果.xlsx") as writer:
|
||
task1_result.to_excel(writer, sheet_name="任务1", index=False)
|
||
task2_case.to_excel(writer, sheet_name="任务2_一案一账户收", index=False)
|
||
task2_bank.to_excel(writer, sheet_name="任务2_银行收入", index=False)
|
||
task3_case_result.to_excel(writer, sheet_name="任务3_一案一账户收", index=False)
|
||
task3_bank_result.to_excel(writer, sheet_name="任务3_银行收入", index=False)
|
||
|
||
print("所有任务结果已保存到 '任务结果.xlsx' 文件中。")
|