feat: 增加触发词逻辑;修改批注逻辑;

3bb9ff31 · ccran · 6b4d3476 · 3bb9ff31 · 3bb9ff31 · 3bb9ff31
Commit 3bb9ff31 authored Mar 25, 2026 by ccran
9 changed files
--- a/core/__pycache__/config.cpython-312.pyc
+++ b/core/__pycache__/config.cpython-312.pyc
--- a/core/config.py
+++ b/core/config.py
@@ -16,7 +16,7 @@ MAX_SINGLE_CHUNK_SIZE=5000
 META_KEY="META"
 DEFAULT_RULESET_ID = "通用"
 ALL_RULESET_IDS = ["通用","借款","担保","财务口","金盘","金盘简化"]
-use_lufa = True
+use_lufa = False
 if use_lufa:
    outer_backend_url = "http://znkf.lgfzgroup.com:48081"
    base_fastgpt_url = "http://192.168.252.71:18089"

--- a/core/tools/segment_rule_router.py
+++ b/core/tools/segment_rule_router.py
@@ -141,16 +141,27 @@ class SegmentRuleRouterTool(LLMTool):
            for item in llm_selected
            if item.get("title")
        }
+        trigger_titles = self._match_trigger_titles(segment_text=segment_text, rules=rules)
+        merged_titles = selected_titles | trigger_titles
-        if not selected_titles:
+        if not merged_titles:
-            return self._fallback_route(segment_text=segment_text, rules=rules)
+            return []
-        title_to_rule = {str(r.get("title", "")).strip(): r for r in rules if r.get("title")}
        routed_rules: List[Dict] = []
-        for title in selected_titles:
+        for rule in rules:
-            rule = title_to_rule.get(title)
+            title = str(rule.get("title", "")).strip()
-            if not rule:
+            if not title or title not in merged_titles:
                continue
+            llm_reason = selected_reasons.get(title, "")
+            trigger_matched = title in trigger_titles
+            if llm_reason and trigger_matched:
+                reason = f"llm+trigger: {llm_reason}"
+            elif llm_reason:
+                reason = llm_reason
+            else:
+                reason = "trigger matched"
            routed_rules.append(
                {
                    "id": rule.get("id", ""),
@@ -158,42 +169,21 @@ class SegmentRuleRouterTool(LLMTool):
                    "level": rule.get("level", ""),
                    "rule": rule.get("rule", ""),
                    "triggers": rule.get("triggers", ""),
-                    "reason": selected_reasons.get(title, ""),
+                    "reason": reason,
                }
            )
-        return routed_rules or self._fallback_route(segment_text=segment_text, rules=rules)
+        return routed_rules
-    def _fallback_route(self, segment_text: str, rules: List[Dict]) -> List[Dict]:
+    def _match_trigger_titles(self, segment_text: str, rules: List[Dict]) -> set[str]:
        text = segment_text or ""
-        routed: List[Dict] = []
+        matched_titles: set[str] = set()
        for r in rules:
            triggers = self._parse_triggers(str(r.get("triggers", "")))
            if triggers and any(t in text for t in triggers):
-                routed.append(
+                title = str(r.get("title", "")).strip()
-                    {
+                if title:
-                        "id": r.get("id", ""),
+                    matched_titles.add(title)
-                        "title": r.get("title", ""),
+        return matched_titles
-                        "level": r.get("level", ""),
-                        "rule": r.get("rule", ""),
-                        "triggers": r.get("triggers", ""),
-                        "reason": "fallback: trigger matched",
-                    }
-                )
-        # 兜底策略：若触发词也未命中，返回全部规则，保证召回不漏审。
-        if not routed:
-            for r in rules:
-                routed.append(
-                    {
-                        "id": r.get("id", ""),
-                        "title": r.get("title", ""),
-                        "level": r.get("level", ""),
-                        "rule": r.get("rule", ""),
-                        "triggers": r.get("triggers", ""),
-                        "reason": "fallback: conservative full recall",
-                    }
-                )
-        return routed
    def _parse_triggers(self, trigger_text: str) -> List[str]:
        parts = re.split(r"[，,、;；\s/|]+", trigger_text or "")

--- a/data/batch/batch.py
+++ b/data/batch/batch.py
@@ -12,9 +12,9 @@ from loguru import logger
 from utils.common_util import random_str
 from utils.http_util import upload_file, fastgpt_openai_chat, download_file
-SUFFIX='_麓发改进'
+SUFFIX='_麓发迁移'
 batch_input_dir_path = 'jp-input'
-batch_output_dir_path = 'jp-output-lufa-simple'
+batch_output_dir_path = 'jp-output-lufa-simple-new'
 batch_size = 5
 # 麓发fastgpt接口
 # url = 'http://192.168.252.71:18089/api/v1/chat/completions'
@@ -24,6 +24,8 @@ url = 'http://192.168.252.71:18088/api/v1/chat/completions'
 # token = 'fastgpt-ek3Z6PxI6sXgYc0jxzZ5bVGqrxwM6aVyfSmA6JVErJYBMr2KmYxrHwEUOIMSYz'
 # 金盘迁移麓发合同审查测试token
 token = 'fastgpt-vykT6qs07g7hR4tL2MNJE6DdNCIxaQjEu3Cxw9nuTBFg8MAG3CkByvnXKxSNEyMK7'
+# 人机交互测试（测试环境）
+# token = 'fastgpt-p189K5zoTX5wjp0dBybFCwsbWm3juIwlJxt2wTGyiaOWOANI5Y10pKEZzyt'
 # 人机交互测试（生产环境）
 # token = 'fastgpt-ry4jIjgNwmNgufMr5jR0ncvJVmSS4GZl4bx2ItsNPoncdQzW9Na3IP1Xrankr'
 # 提取后审查测试

--- a/data/benchmark/eval.py
+++ b/data/benchmark/eval.py
@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace:
 	parser.add_argument(
 		"--datasets-dir",
 		type=Path,
-		default=base / "results" / "jp-output-renji",
+		default=base / "results" / "jp-output-lufa-simple-new",
 		help="Directory containing Word files with annotations.",
 	)
 	parser.add_argument(
@@ -133,13 +133,13 @@ def _parse_args() -> argparse.Namespace:
 	parser.add_argument(
 		"--val-dir",
 		type=Path,
-		default=base / "results" / "jp-output-renji-extracted",
+		default=base / "results" / "jp-output-lufa-simple-new-extracted",
 		help="Directory to store extracted xlsx files for comparison.",
 	)
 	parser.add_argument(
 		"--strip-suffixes",
 		nargs="*",
-		default=['_麓发改进','_人机交互'],
+		default=['_麓发改进','_人机交互','_麓发迁移'],
 		help=(
 			"Optional filename suffixes to strip from generated val xlsx stems before "
 			"comparison, e.g. --strip-suffixes _v1 _审阅版"

--- a/data/rules.xlsx
+++ b/data/rules.xlsx
--- a/utils/__pycache__/spire_word_util.cpython-312.pyc
+++ b/utils/__pycache__/spire_word_util.cpython-312.pyc
--- a/utils/spire_word_util copy.py
+++ b/utils/spire_word_util copy.py
--- a/utils/spire_word_util.py
+++ b/utils/spire_word_util.py