feat: 调整batch;eval;

1f595950 · ccran · d26c53e1 · 1f595950 · 1f595950 · 1f595950
Commit 1f595950 authored Apr 03, 2026 by ccran
10 changed files
--- a/README.md
+++ b/README.md
@@ -179,6 +179,51 @@ LLMConfig:
  model: "你的模型名称"
 ```
+### 📊 评估与回归测试流程
+**目标**：验证规则修改后的效果，对比 F1 值变化。
+**步骤**：
+1. **重启服务**（重新加载 `rules.xlsx`）：
+   ```bash
+   tmux attach -t jp
+   # Ctrl+C 中断当前进程
+   conda activate lufa && python main.py
+   # Ctrl+D 退出 tmux
+   ```
+2. **运行批处理**（在 `tmux batch` 会话中）：
+   ```bash
+   tmux attach -t batch
+   conda activate lufa
+   python data/batch/batch.py
+   # 等待约 20-30 分钟，直到看到 "all done."
+   ```
+3. **运行评估**（批处理完成后）：
+   ```bash
+   cd /home/ccran/lufa-contract
+   python data/benchmark/eval.py --datasets-dir jp-output-lufa --val-dir jp-output-lufa-val
+   ```
+4. **查看结果**：
+   评估结果会生成在 `data/benchmark/results/` 目录下，查看 `.log` 文件中的 F1 值：
+   ```bash
+   grep -E "付款时间审查 | 运输保险审查 | 技术性能审查" data/benchmark/results/*.log
+   ```
+**关键指标**：
+- **准确率 (Precision)**：模型识别出的风险中，真正是风险的比例。
+- **召回率 (Recall)**：所有真实风险中，被模型找出的比例。
+- **F1 值**：准确率和召回率的调和平均数，综合评价指标。
+**规则优化策略**：
+- **误报多（Precision 低）**：在规则中增加**反例案例**，明确区分相似但合格的情况。
+- **漏报多（Recall 低）**：在规则中增加**同义词**或**更灵活的匹配模式**。
+- **参考案例**：`data/rules.xlsx` 中的 "案例" 列，用于指导模型判断。
+---
 ## 📄 许可证
 内部使用，保留所有权利。

--- a/__pycache__/main.cpython-312.pyc
+++ b/__pycache__/main.cpython-312.pyc
--- a/core/__pycache__/config.cpython-312.pyc
+++ b/core/__pycache__/config.cpython-312.pyc
--- a/core/__pycache__/memory.cpython-312.pyc
+++ b/core/__pycache__/memory.cpython-312.pyc
--- a/core/config.py
+++ b/core/config.py
@@ -16,8 +16,8 @@ class LLMConfig:
 MERGE_RULE_PROMPT = False
 MAX_SINGLE_CHUNK_SIZE = 5000
 META_KEY = "META"
-DEFAULT_RULESET_ID = "通用"
+DEFAULT_RULESET_ID = "金盘简化"
-ALL_RULESET_IDS = ["通用", "借款", "担保", "财务口", "金盘", "金盘简化"]
+ALL_RULESET_IDS = ["金盘简化"]
 use_lufa = False
 if use_lufa:
    outer_backend_url = "http://znkf.lgfzgroup.com:48081"

--- a/core/tools/__pycache__/retrieve_reference.cpython-312.pyc
+++ b/core/tools/__pycache__/retrieve_reference.cpython-312.pyc
--- a/data/batch/batch.py
+++ b/data/batch/batch.py
@@ -2,6 +2,7 @@
 import os
 import re
 import sys
+import time
 sys.path.append("../..")
 import traceback
@@ -14,7 +15,7 @@ from utils.http_util import upload_file, fastgpt_openai_chat, download_file
 SUFFIX = "_麓发迁移"
 batch_input_dir_path = "jp-input"
-batch_output_dir_path = "jp-output-lufa-new"
+batch_output_dir_path = f"/home/ccran/lufa-contract/data/benchmark/results/jp-output-lufa-{time.strftime('%Y%m%d-%H%M%S', time.localtime())}"
 # SUFFIX = "_麓发"
 # batch_input_dir_path = "lufa-input"
 # batch_output_dir_path = "lufa-output"
@@ -126,4 +127,8 @@ def execute_batch(max_workers: int = 4):
 if __name__ == "__main__":
+    import os
    execute_batch(batch_size)
+    print("all done!")
+    print("文件保存在: ", os.path.abspath(batch_output_dir_path))
--- a/data/benchmark/compare_annotation.py
+++ b/data/benchmark/compare_annotation.py
@@ -3,6 +3,7 @@ from pathlib import Path
 import pandas as pd
 from rapidfuzz import fuzz
 from contextlib import redirect_stdout, redirect_stderr
+import time
 fuzz_score_threshold = 80
@@ -279,7 +280,9 @@ def _compare_impl(val_dir: Path, answer_dir: Path) -> None:
        output_excel = results_dir / f"合同审查结果_{compare_dir_name}.xlsx"
        with pd.ExcelWriter(output_excel, engine="openpyxl") as writer:
            combined_df.to_excel(writer, sheet_name="对比结果", index=False)
-        print(f"Excel written to {output_excel}")
+        print(
+            f"Excel written to {output_excel}.\nEval Time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}"
+        )
 def compare(val_dir: Path, answer_dir: Path) -> None:

--- a/data/benchmark/eval.py
+++ b/data/benchmark/eval.py
@@ -125,18 +125,6 @@ def _parse_args() -> argparse.Namespace:
        help="Directory containing Word files with annotations.",
    )
    parser.add_argument(
-        "--answer-dir",
-        type=Path,
-        default=base / "审查答案",
-        help="Directory containing labeled answer xlsx files.",
-    )
-    parser.add_argument(
-        "--val-dir",
-        type=Path,
-        default=base / "results" / "jp-output-lufa-extracted",
-        help="Directory to store extracted xlsx files for comparison.",
-    )
-    parser.add_argument(
        "--strip-suffixes",
        nargs="*",
        default=["_麓发改进", "_人机交互", "_麓发迁移"],
@@ -149,10 +137,13 @@ def _parse_args() -> argparse.Namespace:
 if __name__ == "__main__":
+    base = Path(__file__).parent
    args = _parse_args()
    eval(
        datasets_dir=args.datasets_dir,
-        answer_dir=args.answer_dir,
+        answer_dir=base / "审查答案",
-        val_dir=args.val_dir,
+        val_dir=args.datasets_dir.with_name(
+            f"{args.datasets_dir.name}-extract-comment"
+        ),
        strip_suffixes=args.strip_suffixes,
    )
--- a/data/rules.xlsx
+++ b/data/rules.xlsx