feat: 技术性能77%

2347d107 · ccran · 1f595950 · 2347d107 · 2347d107 · 2347d107
Commit 2347d107 authored Apr 08, 2026 by ccran
13 changed files
--- a/__pycache__/main.cpython-312.pyc
+++ b/__pycache__/main.cpython-312.pyc
--- a/core/__pycache__/config.cpython-312.pyc
+++ b/core/__pycache__/config.cpython-312.pyc
--- a/core/__pycache__/memory.cpython-312.pyc
+++ b/core/__pycache__/memory.cpython-312.pyc
--- a/core/config.py
+++ b/core/config.py
@@ -16,8 +16,9 @@ class LLMConfig:
 MERGE_RULE_PROMPT = False
 MAX_SINGLE_CHUNK_SIZE = 5000
 META_KEY = "META"
-DEFAULT_RULESET_ID = "金盘简化"
-ALL_RULESET_IDS = ["金盘简化"]
+DEFAULT_RULESET_ID = "通用"
+ALL_RULESET_IDS = ["通用", "借款", "担保", "财务口", "金盘", "金盘简化", "麓发测试"]
+
 use_lufa = False
 if use_lufa:
    outer_backend_url = "http://znkf.lgfzgroup.com:48081"

--- a/core/tools/__pycache__/segment_review.cpython-312.pyc
+++ b/core/tools/__pycache__/segment_review.cpython-312.pyc
--- a/core/tools/segment_merger.py
+++ b/core/tools/segment_merger.py
--- a/core/tools/segment_review.py
+++ b/core/tools/segment_review.py
--- a/data/batch/batch.py
+++ b/data/batch/batch.py
@@ -16,9 +16,10 @@ from utils.http_util import upload_file, fastgpt_openai_chat, download_file
 SUFFIX = "_麓发迁移"
 batch_input_dir_path = "jp-input"
 batch_output_dir_path = f"/home/ccran/lufa-contract/data/benchmark/results/jp-output-lufa-{time.strftime('%Y%m%d-%H%M%S', time.localtime())}"
+
 # SUFFIX = "_麓发"
 # batch_input_dir_path = "lufa-input"
-# batch_output_dir_path = "lufa-output"
+# batch_output_dir_path = "lufa-output-standard"
 batch_size = 5
 # 麓发fastgpt接口
 # url = "http://192.168.252.71:18089/api/v1/chat/completions"
@@ -26,6 +27,8 @@ batch_size = 5
 url = "http://192.168.252.71:18088/api/v1/chat/completions"
 # 麓发合同审查生产token
 # token = "fastgpt-ek3Z6PxI6sXgYc0jxzZ5bVGqrxwM6aVyfSmA6JVErJYBMr2KmYxrHwEUOIMSYz"
+# 麓发合同审查生产token-标准化
+# token = "fastgpt-mg5tQUgreJeF7peoOr5zqP0NR4EIrfS2bEVXge6FUL94Suu1TvEMR1sGNRSiV"
 # 金盘迁移麓发合同审查测试token
 token = "fastgpt-vykT6qs07g7hR4tL2MNJE6DdNCIxaQjEu3Cxw9nuTBFg8MAG3CkByvnXKxSNEyMK7"
 # 人机交互测试（测试环境）

--- a/data/benchmark/eval.py
+++ b/data/benchmark/eval.py
@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace:
    parser.add_argument(
        "--datasets-dir",
        type=Path,
-        default=base / "results" / "jp-output-lufa",
+        default=base / "results" / "jp-output-lufa-20260408-182708",
        help="Directory containing Word files with annotations.",
    )
    parser.add_argument(

--- a/data/rules.xlsx
+++ b/data/rules.xlsx
--- a/demo/spire_doc_revision_compare_demo.py
+++ b/demo/spire_doc_revision_compare_demo.py
+from spire.doc import *
+import difflib
+
+
+# 将文本写入文件
+def WriteAllText(fname: str, text: str):
+    with open(fname, "w", encoding="utf-8") as fp:
+        fp.write(text)
+
+
+inputFile = "/home/ccran/lufa-contract/demo/修订测试.docx"
+
+# 创建Document类的对象
+doc = Document()
+
+# 加载Word文档
+doc.LoadFromFile(inputFile)
+before_text = doc.GetText()
+print("===== 修订前文本 =====")
+print(before_text)
+
+# 检查文档是否有未接受的修订
+if doc.HasChanges:
+    # 接受所有修订
+    doc.AcceptChanges()
+
+after_text = doc.GetText()
+print("===== 修订后文本 =====")
+print(after_text)
+
+before_lines = before_text.splitlines()
+after_lines = after_text.splitlines()
+
+diff_lines = list(
+    difflib.unified_diff(
+        before_lines,
+        after_lines,
+        fromfile="before_revision",
+        tofile="after_revision",
+        lineterm="",
+    )
+)
+
+print("===== Diff 对比结果 =====")
+if diff_lines:
+    print("\n".join(diff_lines))
+else:
+    print("修订前后文本一致，无差异。")
+
+doc.Close()
--- a/demo/spire_doc_revision_demo.py
+++ b/demo/spire_doc_revision_demo.py
+from spire.doc import *
+
+
+# 将文本写入文件
+def WriteAllText(fname: str, text: str):
+    with open(fname, "w", encoding="utf-8") as fp:
+        fp.write(text)
+
+
+inputFile = "/home/ccran/lufa-contract/demo/修订测试.docx"
+outputFile1 = "/home/ccran/lufa-contract/demo/新增修订.txt"
+outputFile2 = "/home/ccran/lufa-contract/demo/删除修订.txt"
+
+# 创建Document类的对象
+document = Document()
+
+# 加载Word文档
+document.LoadFromFile(inputFile)
+print(document.GetText())
+
+# 初始化列表来收集文本片段
+insert_revisions = []
+delete_revisions = []
+
+# 遍历文档所有章节
+for k in range(document.Sections.Count):
+    sec = document.Sections.get_Item(k)
+
+    # 遍历章节中的正文元素
+    for m in range(sec.Body.ChildObjects.Count):
+        # Check if the item is a Paragraph
+        docItem = sec.Body.ChildObjects.get_Item(m)
+        if isinstance(docItem, Paragraph):
+            para = docItem
+
+            # 判断段落是否为插入修订
+            if para.IsInsertRevision:
+                # 获取修订的类型，作者及其关联的内容
+                insRevison = para.InsertRevision
+                insType = insRevison.Type
+                insAuthor = insRevison.Author
+                insert_revisions.append(f"Revision Type: {insType.name}\n")
+                insert_revisions.append(f"Revision Author: {insAuthor}\n")
+                insert_revisions.append(f"Insertion Text: {para.Text}\n\n")
+            # 判断段落是否为删除修订
+            elif para.IsDeleteRevision:
+                # 获取修订的类型，作者及其关联的内容
+                delRevison = para.DeleteRevision
+                delType = delRevison.Type
+                delAuthor = delRevison.Author
+                delete_revisions.append(f"Revision Type:: {delType.name}\n")
+                delete_revisions.append(f"Revision Author: {delAuthor}\n")
+                delete_revisions.append(f"Deletion Text: {para.Text}\n\n")
+            # 如果段落没有修订，则遍历段落中的元素
+            else:
+                for j in range(para.ChildObjects.Count):
+                    obj = para.ChildObjects.get_Item(j)
+                    if isinstance(obj, TextRange):
+                        textRange = obj
+
+                        # 判断textrange是否为插入修订
+                        if textRange.IsInsertRevision:
+                            # 获取修订的类型，作者及其关联的内容
+                            insRevison = textRange.InsertRevision
+                            insType = insRevison.Type
+                            insAuthor = insRevison.Author
+                            insert_revisions.append(f"Revision Type: {insType.name}\n")
+                            insert_revisions.append(f"Revision Author: {insAuthor}\n")
+                            insert_revisions.append(
+                                f"Insertion Text: {textRange.Text}\n"
+                            )
+                        # 判断textrange是否为删除修订
+                        elif textRange.IsDeleteRevision:
+                            # 获取修订的类型，作者及其关联的内容
+                            delRevison = textRange.DeleteRevision
+                            delType = delRevison.Type
+                            delAuthor = delRevison.Author
+                            delete_revisions.append(f"Revision Type: {delType.name}\n")
+                            delete_revisions.append(f"Revision Author: {delAuthor}\n")
+                            delete_revisions.append(
+                                f"Deletion Text: {textRange.Text}\n"
+                            )
+
+# 写出修订信息到文件
+WriteAllText(outputFile1, "".join(insert_revisions))
+WriteAllText(outputFile2, "".join(delete_revisions))
+
+# 释放资源
+document.Dispose()
--- a/main.py
+++ b/main.py
@@ -394,7 +394,9 @@ def merge_segment_findings(payload: MergerRequest) -> MergerResponse:
    unqualified_findings = [
        f for f in segment_findings if (f.result or "").strip() == "不合格"
    ]
-    merged_result = merger_tool.run([f.__dict__ for f in unqualified_findings])
+    merged_result = merger_tool.run(
+        [f.__dict__ for f in unqualified_findings], merge_mode="rule"
+    )
    merged_findings = merged_result.get("findings", []) or []

    for f in merged_findings: