Commit 1f595950 by ccran

feat: 调整batch;eval;

parent d26c53e1
...@@ -179,6 +179,51 @@ LLMConfig: ...@@ -179,6 +179,51 @@ LLMConfig:
model: "你的模型名称" model: "你的模型名称"
``` ```
### 📊 评估与回归测试流程
**目标**:验证规则修改后的效果,对比 F1 值变化。
**步骤**
1. **重启服务**(重新加载 `rules.xlsx`):
```bash
tmux attach -t jp
# Ctrl+C 中断当前进程
conda activate lufa && python main.py
# Ctrl+D 退出 tmux
```
2. **运行批处理**(在 `tmux batch` 会话中):
```bash
tmux attach -t batch
conda activate lufa
python data/batch/batch.py
# 等待约 20-30 分钟,直到看到 "all done."
```
3. **运行评估**(批处理完成后):
```bash
cd /home/ccran/lufa-contract
python data/benchmark/eval.py --datasets-dir jp-output-lufa --val-dir jp-output-lufa-val
```
4. **查看结果**
评估结果会生成在 `data/benchmark/results/` 目录下,查看 `.log` 文件中的 F1 值:
```bash
grep -E "付款时间审查 | 运输保险审查 | 技术性能审查" data/benchmark/results/*.log
```
**关键指标**
- **准确率 (Precision)**:模型识别出的风险中,真正是风险的比例。
- **召回率 (Recall)**:所有真实风险中,被模型找出的比例。
- **F1 值**:准确率和召回率的调和平均数,综合评价指标。
**规则优化策略**
- **误报多(Precision 低)**:在规则中增加**反例案例**,明确区分相似但合格的情况。
- **漏报多(Recall 低)**:在规则中增加**同义词****更灵活的匹配模式**
- **参考案例**`data/rules.xlsx` 中的 "案例" 列,用于指导模型判断。
---
## 📄 许可证 ## 📄 许可证
内部使用,保留所有权利。 内部使用,保留所有权利。
......
...@@ -16,8 +16,8 @@ class LLMConfig: ...@@ -16,8 +16,8 @@ class LLMConfig:
MERGE_RULE_PROMPT = False MERGE_RULE_PROMPT = False
MAX_SINGLE_CHUNK_SIZE = 5000 MAX_SINGLE_CHUNK_SIZE = 5000
META_KEY = "META" META_KEY = "META"
DEFAULT_RULESET_ID = "通用" DEFAULT_RULESET_ID = "金盘简化"
ALL_RULESET_IDS = ["通用", "借款", "担保", "财务口", "金盘", "金盘简化"] ALL_RULESET_IDS = ["金盘简化"]
use_lufa = False use_lufa = False
if use_lufa: if use_lufa:
outer_backend_url = "http://znkf.lgfzgroup.com:48081" outer_backend_url = "http://znkf.lgfzgroup.com:48081"
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import os import os
import re import re
import sys import sys
import time
sys.path.append("../..") sys.path.append("../..")
import traceback import traceback
...@@ -14,7 +15,7 @@ from utils.http_util import upload_file, fastgpt_openai_chat, download_file ...@@ -14,7 +15,7 @@ from utils.http_util import upload_file, fastgpt_openai_chat, download_file
SUFFIX = "_麓发迁移" SUFFIX = "_麓发迁移"
batch_input_dir_path = "jp-input" batch_input_dir_path = "jp-input"
batch_output_dir_path = "jp-output-lufa-new" batch_output_dir_path = f"/home/ccran/lufa-contract/data/benchmark/results/jp-output-lufa-{time.strftime('%Y%m%d-%H%M%S', time.localtime())}"
# SUFFIX = "_麓发" # SUFFIX = "_麓发"
# batch_input_dir_path = "lufa-input" # batch_input_dir_path = "lufa-input"
# batch_output_dir_path = "lufa-output" # batch_output_dir_path = "lufa-output"
...@@ -126,4 +127,8 @@ def execute_batch(max_workers: int = 4): ...@@ -126,4 +127,8 @@ def execute_batch(max_workers: int = 4):
if __name__ == "__main__": if __name__ == "__main__":
import os
execute_batch(batch_size) execute_batch(batch_size)
print("all done!")
print("文件保存在: ", os.path.abspath(batch_output_dir_path))
...@@ -3,6 +3,7 @@ from pathlib import Path ...@@ -3,6 +3,7 @@ from pathlib import Path
import pandas as pd import pandas as pd
from rapidfuzz import fuzz from rapidfuzz import fuzz
from contextlib import redirect_stdout, redirect_stderr from contextlib import redirect_stdout, redirect_stderr
import time
fuzz_score_threshold = 80 fuzz_score_threshold = 80
...@@ -279,7 +280,9 @@ def _compare_impl(val_dir: Path, answer_dir: Path) -> None: ...@@ -279,7 +280,9 @@ def _compare_impl(val_dir: Path, answer_dir: Path) -> None:
output_excel = results_dir / f"合同审查结果_{compare_dir_name}.xlsx" output_excel = results_dir / f"合同审查结果_{compare_dir_name}.xlsx"
with pd.ExcelWriter(output_excel, engine="openpyxl") as writer: with pd.ExcelWriter(output_excel, engine="openpyxl") as writer:
combined_df.to_excel(writer, sheet_name="对比结果", index=False) combined_df.to_excel(writer, sheet_name="对比结果", index=False)
print(f"Excel written to {output_excel}") print(
f"Excel written to {output_excel}.\nEval Time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}"
)
def compare(val_dir: Path, answer_dir: Path) -> None: def compare(val_dir: Path, answer_dir: Path) -> None:
......
...@@ -125,18 +125,6 @@ def _parse_args() -> argparse.Namespace: ...@@ -125,18 +125,6 @@ def _parse_args() -> argparse.Namespace:
help="Directory containing Word files with annotations.", help="Directory containing Word files with annotations.",
) )
parser.add_argument( parser.add_argument(
"--answer-dir",
type=Path,
default=base / "审查答案",
help="Directory containing labeled answer xlsx files.",
)
parser.add_argument(
"--val-dir",
type=Path,
default=base / "results" / "jp-output-lufa-extracted",
help="Directory to store extracted xlsx files for comparison.",
)
parser.add_argument(
"--strip-suffixes", "--strip-suffixes",
nargs="*", nargs="*",
default=["_麓发改进", "_人机交互", "_麓发迁移"], default=["_麓发改进", "_人机交互", "_麓发迁移"],
...@@ -149,10 +137,13 @@ def _parse_args() -> argparse.Namespace: ...@@ -149,10 +137,13 @@ def _parse_args() -> argparse.Namespace:
if __name__ == "__main__": if __name__ == "__main__":
base = Path(__file__).parent
args = _parse_args() args = _parse_args()
eval( eval(
datasets_dir=args.datasets_dir, datasets_dir=args.datasets_dir,
answer_dir=args.answer_dir, answer_dir=base / "审查答案",
val_dir=args.val_dir, val_dir=args.datasets_dir.with_name(
f"{args.datasets_dir.name}-extract-comment"
),
strip_suffixes=args.strip_suffixes, strip_suffixes=args.strip_suffixes,
) )
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment