Commit 3bb9ff31 by ccran

feat: 增加触发词逻辑;修改批注逻辑;

parent 6b4d3476
...@@ -16,7 +16,7 @@ MAX_SINGLE_CHUNK_SIZE=5000 ...@@ -16,7 +16,7 @@ MAX_SINGLE_CHUNK_SIZE=5000
META_KEY="META" META_KEY="META"
DEFAULT_RULESET_ID = "通用" DEFAULT_RULESET_ID = "通用"
ALL_RULESET_IDS = ["通用","借款","担保","财务口","金盘","金盘简化"] ALL_RULESET_IDS = ["通用","借款","担保","财务口","金盘","金盘简化"]
use_lufa = True use_lufa = False
if use_lufa: if use_lufa:
outer_backend_url = "http://znkf.lgfzgroup.com:48081" outer_backend_url = "http://znkf.lgfzgroup.com:48081"
base_fastgpt_url = "http://192.168.252.71:18089" base_fastgpt_url = "http://192.168.252.71:18089"
......
...@@ -141,16 +141,27 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -141,16 +141,27 @@ class SegmentRuleRouterTool(LLMTool):
for item in llm_selected for item in llm_selected
if item.get("title") if item.get("title")
} }
trigger_titles = self._match_trigger_titles(segment_text=segment_text, rules=rules)
merged_titles = selected_titles | trigger_titles
if not selected_titles: if not merged_titles:
return self._fallback_route(segment_text=segment_text, rules=rules) return []
title_to_rule = {str(r.get("title", "")).strip(): r for r in rules if r.get("title")}
routed_rules: List[Dict] = [] routed_rules: List[Dict] = []
for title in selected_titles: for rule in rules:
rule = title_to_rule.get(title) title = str(rule.get("title", "")).strip()
if not rule: if not title or title not in merged_titles:
continue continue
llm_reason = selected_reasons.get(title, "")
trigger_matched = title in trigger_titles
if llm_reason and trigger_matched:
reason = f"llm+trigger: {llm_reason}"
elif llm_reason:
reason = llm_reason
else:
reason = "trigger matched"
routed_rules.append( routed_rules.append(
{ {
"id": rule.get("id", ""), "id": rule.get("id", ""),
...@@ -158,42 +169,21 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -158,42 +169,21 @@ class SegmentRuleRouterTool(LLMTool):
"level": rule.get("level", ""), "level": rule.get("level", ""),
"rule": rule.get("rule", ""), "rule": rule.get("rule", ""),
"triggers": rule.get("triggers", ""), "triggers": rule.get("triggers", ""),
"reason": selected_reasons.get(title, ""), "reason": reason,
} }
) )
return routed_rules or self._fallback_route(segment_text=segment_text, rules=rules) return routed_rules
def _fallback_route(self, segment_text: str, rules: List[Dict]) -> List[Dict]: def _match_trigger_titles(self, segment_text: str, rules: List[Dict]) -> set[str]:
text = segment_text or "" text = segment_text or ""
routed: List[Dict] = [] matched_titles: set[str] = set()
for r in rules: for r in rules:
triggers = self._parse_triggers(str(r.get("triggers", ""))) triggers = self._parse_triggers(str(r.get("triggers", "")))
if triggers and any(t in text for t in triggers): if triggers and any(t in text for t in triggers):
routed.append( title = str(r.get("title", "")).strip()
{ if title:
"id": r.get("id", ""), matched_titles.add(title)
"title": r.get("title", ""), return matched_titles
"level": r.get("level", ""),
"rule": r.get("rule", ""),
"triggers": r.get("triggers", ""),
"reason": "fallback: trigger matched",
}
)
# 兜底策略:若触发词也未命中,返回全部规则,保证召回不漏审。
if not routed:
for r in rules:
routed.append(
{
"id": r.get("id", ""),
"title": r.get("title", ""),
"level": r.get("level", ""),
"rule": r.get("rule", ""),
"triggers": r.get("triggers", ""),
"reason": "fallback: conservative full recall",
}
)
return routed
def _parse_triggers(self, trigger_text: str) -> List[str]: def _parse_triggers(self, trigger_text: str) -> List[str]:
parts = re.split(r"[,,、;;\s/|]+", trigger_text or "") parts = re.split(r"[,,、;;\s/|]+", trigger_text or "")
......
...@@ -12,9 +12,9 @@ from loguru import logger ...@@ -12,9 +12,9 @@ from loguru import logger
from utils.common_util import random_str from utils.common_util import random_str
from utils.http_util import upload_file, fastgpt_openai_chat, download_file from utils.http_util import upload_file, fastgpt_openai_chat, download_file
SUFFIX='_麓发改进' SUFFIX='_麓发迁移'
batch_input_dir_path = 'jp-input' batch_input_dir_path = 'jp-input'
batch_output_dir_path = 'jp-output-lufa-simple' batch_output_dir_path = 'jp-output-lufa-simple-new'
batch_size = 5 batch_size = 5
# 麓发fastgpt接口 # 麓发fastgpt接口
# url = 'http://192.168.252.71:18089/api/v1/chat/completions' # url = 'http://192.168.252.71:18089/api/v1/chat/completions'
...@@ -24,6 +24,8 @@ url = 'http://192.168.252.71:18088/api/v1/chat/completions' ...@@ -24,6 +24,8 @@ url = 'http://192.168.252.71:18088/api/v1/chat/completions'
# token = 'fastgpt-ek3Z6PxI6sXgYc0jxzZ5bVGqrxwM6aVyfSmA6JVErJYBMr2KmYxrHwEUOIMSYz' # token = 'fastgpt-ek3Z6PxI6sXgYc0jxzZ5bVGqrxwM6aVyfSmA6JVErJYBMr2KmYxrHwEUOIMSYz'
# 金盘迁移麓发合同审查测试token # 金盘迁移麓发合同审查测试token
token = 'fastgpt-vykT6qs07g7hR4tL2MNJE6DdNCIxaQjEu3Cxw9nuTBFg8MAG3CkByvnXKxSNEyMK7' token = 'fastgpt-vykT6qs07g7hR4tL2MNJE6DdNCIxaQjEu3Cxw9nuTBFg8MAG3CkByvnXKxSNEyMK7'
# 人机交互测试(测试环境)
# token = 'fastgpt-p189K5zoTX5wjp0dBybFCwsbWm3juIwlJxt2wTGyiaOWOANI5Y10pKEZzyt'
# 人机交互测试(生产环境) # 人机交互测试(生产环境)
# token = 'fastgpt-ry4jIjgNwmNgufMr5jR0ncvJVmSS4GZl4bx2ItsNPoncdQzW9Na3IP1Xrankr' # token = 'fastgpt-ry4jIjgNwmNgufMr5jR0ncvJVmSS4GZl4bx2ItsNPoncdQzW9Na3IP1Xrankr'
# 提取后审查测试 # 提取后审查测试
......
...@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace: ...@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace:
parser.add_argument( parser.add_argument(
"--datasets-dir", "--datasets-dir",
type=Path, type=Path,
default=base / "results" / "jp-output-renji", default=base / "results" / "jp-output-lufa-simple-new",
help="Directory containing Word files with annotations.", help="Directory containing Word files with annotations.",
) )
parser.add_argument( parser.add_argument(
...@@ -133,13 +133,13 @@ def _parse_args() -> argparse.Namespace: ...@@ -133,13 +133,13 @@ def _parse_args() -> argparse.Namespace:
parser.add_argument( parser.add_argument(
"--val-dir", "--val-dir",
type=Path, type=Path,
default=base / "results" / "jp-output-renji-extracted", default=base / "results" / "jp-output-lufa-simple-new-extracted",
help="Directory to store extracted xlsx files for comparison.", help="Directory to store extracted xlsx files for comparison.",
) )
parser.add_argument( parser.add_argument(
"--strip-suffixes", "--strip-suffixes",
nargs="*", nargs="*",
default=['_麓发改进','_人机交互'], default=['_麓发改进','_人机交互','_麓发迁移'],
help=( help=(
"Optional filename suffixes to strip from generated val xlsx stems before " "Optional filename suffixes to strip from generated val xlsx stems before "
"comparison, e.g. --strip-suffixes _v1 _审阅版" "comparison, e.g. --strip-suffixes _v1 _审阅版"
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment