Commit 461d5ea7 by ccran

feat: 增加修订对比;

parent 3ac7da45
from spire.doc import *
import argparse
import difflib
from itertools import zip_longest
from pathlib import Path
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font
# 将文本写入文件
def WriteAllText(fname: str, text: str):
with open(fname, "w", encoding="utf-8") as fp:
fp.write(text)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Compare Word revision text and write diff rows to Excel.")
parser.add_argument("--input_file", default=r'D:\VsProject\lufa-contract\demo\5、[修订后复审版]麓谷发展集团2026-2027年质量.docx',
help="Path to the Word document with revisions.")
parser.add_argument(
"-o",
"--output",
help="Path to the output Excel file. Defaults to '<input_stem>_修订对比.xlsx'.",
)
parser.add_argument(
"--show-text",
action="store_true",
help="Print full text before and after accepting revisions.",
)
parser.add_argument(
"--no-diff-print",
action="store_true",
help="Do not print unified diff to the console.",
)
return parser.parse_args()
def get_default_output_file(input_file: Path) -> Path:
return input_file.with_name(f"{input_file.stem}_修订对比.xlsx")
def read_revision_texts(input_file: Path) -> tuple[str, str]:
from spire.doc import Document
doc = Document()
try:
doc.LoadFromFile(str(input_file))
before_text = doc.GetText()
if doc.HasChanges:
doc.AcceptChanges()
after_text = doc.GetText()
return before_text, after_text
finally:
doc.Close()
def build_unified_diff(before_lines: list[str], after_lines: list[str]) -> list[str]:
return list(
difflib.unified_diff(
before_lines,
after_lines,
fromfile="before_revision",
tofile="after_revision",
lineterm="",
)
)
def build_diff_rows(before_lines: list[str], after_lines: list[str]) -> list[tuple[str, str]]:
rows: list[tuple[str, str]] = []
matcher = difflib.SequenceMatcher(None, before_lines, after_lines)
for tag, before_start, before_end, after_start, after_end in matcher.get_opcodes():
if tag == "equal":
continue
before_part = before_lines[before_start:before_end]
after_part = after_lines[after_start:after_end]
if tag == "replace":
rows.extend(
(before_line or "", after_line or "")
for before_line, after_line in zip_longest(before_part, after_part)
)
elif tag == "delete":
rows.extend((before_line, "") for before_line in before_part)
elif tag == "insert":
rows.extend(("", after_line) for after_line in after_part)
return rows
def write_diff_rows_to_excel(diff_rows: list[tuple[str, str]], output_file: Path) -> None:
wb = Workbook()
ws = wb.active
ws.title = "revision_compare"
ws.append(["修订前", "修订后"])
for cell in ws[1]:
cell.font = Font(bold=True)
cell.alignment = Alignment(horizontal="center", vertical="center")
for before_line, after_line in diff_rows:
ws.append([before_line, after_line])
for row in ws.iter_rows(min_row=2):
for cell in row:
cell.alignment = Alignment(wrap_text=True, vertical="top")
if row[1].value:
row[1].font = Font(color="FF0000")
ws.column_dimensions["A"].width = 80
ws.column_dimensions["B"].width = 80
ws.freeze_panes = "A2"
wb.save(output_file)
def compare_revisions(input_file: Path, output_file: Path, show_text: bool, print_diff: bool) -> bool:
before_text, after_text = read_revision_texts(input_file)
if show_text:
print("===== 修订前文本 =====")
print(before_text)
print("===== 修订后文本 =====")
print(after_text)
inputFile = "/home/ccran/lufa-contract/demo/修订测试.docx"
if before_text == after_text:
print("修订前后文本一致,无差异,不写入 Excel。")
return False
# 创建Document类的对象
doc = Document()
before_lines = before_text.splitlines()
after_lines = after_text.splitlines()
diff_lines = build_unified_diff(before_lines, after_lines)
diff_rows = build_diff_rows(before_lines, after_lines)
# 加载Word文档
doc.LoadFromFile(inputFile)
before_text = doc.GetText()
print("===== 修订前文本 =====")
print(before_text)
if not diff_rows:
print("没有检测到真实差异,不写入 Excel。")
return False
# 检查文档是否有未接受的修订
if doc.HasChanges:
# 接受所有修订
doc.AcceptChanges()
if print_diff:
print("===== Diff 对比结果 =====")
print("\n".join(diff_lines))
after_text = doc.GetText()
print("===== 修订后文本 =====")
print(after_text)
write_diff_rows_to_excel(diff_rows, output_file)
print(f"Excel Diff 对比结果已写入: {output_file}")
return True
before_lines = before_text.splitlines()
after_lines = after_text.splitlines()
diff_lines = list(
difflib.unified_diff(
before_lines,
after_lines,
fromfile="before_revision",
tofile="after_revision",
lineterm="",
def main() -> None:
args = parse_args()
input_file = Path(args.input_file)
output_file = Path(args.output) if args.output else get_default_output_file(input_file)
compare_revisions(
input_file=input_file,
output_file=output_file,
show_text=args.show_text,
print_diff=not args.no_diff_print,
)
)
print("===== Diff 对比结果 =====")
if diff_lines:
print("\n".join(diff_lines))
else:
print("修订前后文本一致,无差异。")
doc.Close()
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment