Commit 49b473ef by ccran

feat: 新增摘要项;

parent e48b2cd5
...@@ -24,6 +24,7 @@ MERGE_RULE_PROMPT = False ...@@ -24,6 +24,7 @@ MERGE_RULE_PROMPT = False
MAX_SINGLE_CHUNK_SIZE = 5000 MAX_SINGLE_CHUNK_SIZE = 5000
META_KEY = "META" META_KEY = "META"
DEFAULT_RULESET_ID = "通用" DEFAULT_RULESET_ID = "通用"
## 规则集ID列表,需与rules.xlsx中的sheet名称保持一致!!!
ALL_RULESET_IDS = [ ALL_RULESET_IDS = [
"通用", "通用",
"借款", "借款",
...@@ -33,6 +34,7 @@ ALL_RULESET_IDS = [ ...@@ -33,6 +34,7 @@ ALL_RULESET_IDS = [
"金盘简化", "金盘简化",
"麓发测试", "麓发测试",
"麓发标准", "麓发标准",
"金盘B类"
] ]
MAX_WORKERS = 10 MAX_WORKERS = 10
FILE_SUFFIX = "-审核批注" FILE_SUFFIX = "-审核批注"
...@@ -43,6 +45,7 @@ use_lufa = False ...@@ -43,6 +45,7 @@ use_lufa = False
use_jp_machine = True use_jp_machine = True
## 关键参数** ## 关键参数**
ocr_url = 'http://192.168.252.71:8202/openapi/ocrUploadFile'
if use_lufa: if use_lufa:
outer_backend_url = "http://znkf.lgfzgroup.com:48081" outer_backend_url = "http://znkf.lgfzgroup.com:48081"
base_fastgpt_url = "http://192.168.252.71:18089" base_fastgpt_url = "http://192.168.252.71:18089"
...@@ -62,6 +65,7 @@ else: ...@@ -62,6 +65,7 @@ else:
outer_backend_url = "http://172.21.107.45:48080" outer_backend_url = "http://172.21.107.45:48080"
base_fastgpt_url = "http://172.21.107.45:3030" base_fastgpt_url = "http://172.21.107.45:3030"
base_backend_url = "http://172.21.107.45:48080" base_backend_url = "http://172.21.107.45:48080"
ocr_url = "http://172.21.107.45:8202/openapi/ocrUploadFile"
segment_review_api_key = ( segment_review_api_key = (
"fastgpt-vLu2JHAfqwEq5FUQhvATFDK0yDS6fs804v7KwWBMyU4sRrHzh4UGl89Zpa" "fastgpt-vLu2JHAfqwEq5FUQhvATFDK0yDS6fs804v7KwWBMyU4sRrHzh4UGl89Zpa"
) )
......
...@@ -347,13 +347,13 @@ class MemoryStore: ...@@ -347,13 +347,13 @@ class MemoryStore:
ws_facts.append(["元信息", "事实内容"]) ws_facts.append(["元信息", "事实内容"])
for item in self.facts: for item in self.facts:
if not isinstance(item, dict): if not isinstance(item, dict):
ws_facts.append(["事实", json.dumps(item, ensure_ascii=False)]) ws_facts.append(["事实", self._format_summary_for_export(item)])
continue continue
meta_info = item.get(META_KEY, None) meta_info = item.get(META_KEY, None)
ws_facts.append( ws_facts.append(
[ [
json.dumps(meta_info, ensure_ascii=False), json.dumps(meta_info, ensure_ascii=False),
json.dumps(item, ensure_ascii=False), self._format_summary_for_export(item),
] ]
) )
else: else:
...@@ -443,6 +443,43 @@ class MemoryStore: ...@@ -443,6 +443,43 @@ class MemoryStore:
return safe[:31] return safe[:31]
@staticmethod @staticmethod
def _format_summary_for_export(value: Any, level: int = 0) -> str:
indent = " " * level
if isinstance(value, dict):
lines: List[str] = []
for key, child in value.items():
if key == META_KEY:
continue
key_text = str(key)
if isinstance(child, (dict, list)):
lines.append(f"{indent}{key_text}:")
child_text = MemoryStore._format_summary_for_export(child, level + 1)
if child_text:
lines.append(child_text)
else:
lines.append(f"{indent}{key_text}:{MemoryStore._format_scalar(child)}")
return "\n".join(lines)
if isinstance(value, list):
lines = []
for item in value:
if isinstance(item, (dict, list)):
item_text = MemoryStore._format_summary_for_export(item, level)
if item_text:
lines.append(item_text)
else:
lines.append(f"{indent}{MemoryStore._format_scalar(item)}")
return "\n".join(lines)
return f"{indent}{MemoryStore._format_scalar(value)}"
@staticmethod
def _format_scalar(value: Any) -> str:
if value is None:
return ""
return str(value)
@staticmethod
def _normalize_finding_key(key: str) -> str: def _normalize_finding_key(key: str) -> str:
normalized = (key or "").strip().lower() normalized = (key or "").strip().lower()
if not normalized: if not normalized:
......
...@@ -19,9 +19,9 @@ class LLMTool(ToolBase): ...@@ -19,9 +19,9 @@ class LLMTool(ToolBase):
self.system_prompt = system_prompt self.system_prompt = system_prompt
self.llm = OpenAITool(LLM[llm_key], max_workers=MAX_WORKERS) self.llm = OpenAITool(LLM[llm_key], max_workers=MAX_WORKERS)
def build_messages(self, user_content: str) -> List[Dict[str, str]]: def build_messages(self, user_content: str, system_content: str = None) -> List[Dict[str, str]]:
return [ return [
{"role": "system", "content": self.system_prompt}, {"role": "system", "content": system_content or self.system_prompt},
{"role": "user", "content": user_content}, {"role": "user", "content": user_content},
] ]
......
...@@ -2,7 +2,7 @@ from __future__ import annotations ...@@ -2,7 +2,7 @@ from __future__ import annotations
import json import json
import re import re
from typing import Dict, List, Optional from typing import Dict, List, Literal, Optional
from core.tool import tool, tool_func from core.tool import tool, tool_func
from core.tools.segment_llm import LLMTool from core.tools.segment_llm import LLMTool
...@@ -47,6 +47,46 @@ ROUTER_USER_PROMPT = """ ...@@ -47,6 +47,46 @@ ROUTER_USER_PROMPT = """
""" """
SUMMARY_ROUTER_SYSTEM_PROMPT = """
你是合同分段摘要项路由智能体(SegmentSummaryRouter)。
你的任务是:基于“当前分段文本”,从候选摘要项名称中选出当前分段应提取的摘要项。
【路由目标】
- 仅做摘要项适配判断,不输出事实摘要、不输出风险结论、不输出审查建议。
- 候选摘要项只有名称,没有规则正文、触发词或其他辅助信息。
- 高召回优先:只要当前分段明显包含某个摘要项所需的信息,就应路由命中。
- 若候选摘要项明显无关,则不要命中。
【判断依据】
- 以当前分段文本为主。
- 可参考上下文记忆辅助理解术语,但不得脱离当前分段文本做臆断。
【输出约束】
- 严格输出 JSON。
- 摘要项路由只输出命中的摘要项名称,不输出其他信息。
- 若确实没有任何相关摘要项,返回 {"selected_items": []}。
"""
SUMMARY_ROUTER_USER_PROMPT = """
【当前分段文本】
{segment_text}
【上下文记忆】
{context_memories_json}
【合同立场】
{party_role}
【候选摘要项名称】
{candidate_summaries_json}
【任务】
请从候选摘要项名称中选择当前分段应提取的摘要项,并输出 selected_items。
"""
ROUTER_OUTPUT_SCHEMA = """ ROUTER_OUTPUT_SCHEMA = """
```json ```json
{ {
...@@ -61,6 +101,19 @@ ROUTER_OUTPUT_SCHEMA = """ ...@@ -61,6 +101,19 @@ ROUTER_OUTPUT_SCHEMA = """
""" """
SUMMARY_ROUTER_OUTPUT_SCHEMA = """
```json
{
"selected_items": [
{
"name": "摘要项名称"
}
]
}
```
"""
@tool("segment_rule_router", "分段规则路由") @tool("segment_rule_router", "分段规则路由")
class SegmentRuleRouterTool(LLMTool): class SegmentRuleRouterTool(LLMTool):
def __init__(self) -> None: def __init__(self) -> None:
...@@ -75,6 +128,7 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -75,6 +128,7 @@ class SegmentRuleRouterTool(LLMTool):
"rules": {"type": "array", "items": {"type": "object"}}, "rules": {"type": "array", "items": {"type": "object"}},
"party_role": {"type": "string"}, "party_role": {"type": "string"},
"context_memories": {"type": "array"}, "context_memories": {"type": "array"},
"route_by": {"type": "string"},
}, },
"required": ["segment_id", "segment_text", "rules", "party_role"], "required": ["segment_id", "segment_text", "rules", "party_role"],
} }
...@@ -86,8 +140,24 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -86,8 +140,24 @@ class SegmentRuleRouterTool(LLMTool):
rules: List[Dict], rules: List[Dict],
party_role: str, party_role: str,
context_memories: Optional[List[Dict]] = None, context_memories: Optional[List[Dict]] = None,
route_by: Literal["rule", "summary"] = "rule",
) -> Dict: ) -> Dict:
rules = rules or [] rules = rules or []
if route_by == "summary":
routed_summary_names = self._route_summaries(
segment_text=segment_text,
rules=rules,
party_role=party_role,
context_memories=context_memories,
)
return {
"segment_id": segment_id,
"route_by": route_by,
"routed_rules": [],
"routed_rule_titles": [],
"routed_summary_names": routed_summary_names,
}
routed_rules = self._route_rules( routed_rules = self._route_rules(
segment_text=segment_text, segment_text=segment_text,
rules=rules, rules=rules,
...@@ -96,8 +166,10 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -96,8 +166,10 @@ class SegmentRuleRouterTool(LLMTool):
) )
return { return {
"segment_id": segment_id, "segment_id": segment_id,
"route_by": route_by,
"routed_rules": routed_rules, "routed_rules": routed_rules,
"routed_rule_titles": [r.get("title", "") for r in routed_rules], "routed_rule_titles": [r.get("title", "") for r in routed_rules],
"routed_summary_names": [],
} }
def _build_candidate_rules(self, rules: List[Dict]) -> List[Dict]: def _build_candidate_rules(self, rules: List[Dict]) -> List[Dict]:
...@@ -105,6 +177,17 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -105,6 +177,17 @@ class SegmentRuleRouterTool(LLMTool):
{r.get("title", ""): r.get("rule", "")} for r in rules if r.get("title") {r.get("title", ""): r.get("rule", "")} for r in rules if r.get("title")
] ]
def _build_candidate_summaries(self, rules: List[Dict]) -> List[str]:
summaries: List[str] = []
seen: set[str] = set()
for rule in rules:
summary = str(rule.get("summary", "")).strip()
if not summary or summary in seen:
continue
summaries.append(summary)
seen.add(summary)
return summaries
def _route_rules( def _route_rules(
self, self,
segment_text: str, segment_text: str,
...@@ -183,6 +266,66 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -183,6 +266,66 @@ class SegmentRuleRouterTool(LLMTool):
) )
return routed_rules return routed_rules
def _route_summaries(
self,
segment_text: str,
rules: List[Dict],
party_role: str,
context_memories: Optional[List[Dict]],
) -> List[str]:
candidates = self._build_candidate_summaries(rules)
if not candidates:
return []
user_content = (
SUMMARY_ROUTER_USER_PROMPT.format(
segment_text=segment_text,
context_memories_json=json.dumps(
context_memories or [], ensure_ascii=False
),
party_role=party_role,
candidate_summaries_json=json.dumps(candidates, ensure_ascii=False),
)
+ SUMMARY_ROUTER_OUTPUT_SCHEMA
)
llm_selected: List[Dict] = []
try:
resp = self.run_with_loop(
self.chat_async(
[
{"role": "system", "content": SUMMARY_ROUTER_SYSTEM_PROMPT},
{"role": "user", "content": user_content},
]
)
)
data = self.parse_first_json(resp)
llm_selected = data.get("selected_items", []) or []
except Exception:
llm_selected = []
selected_names = set()
for item in llm_selected:
name = self._selected_item_name(item)
if name:
selected_names.add(name)
direct_matched_names = {
name for name in candidates if name and name in (segment_text or "")
}
merged_names = selected_names | direct_matched_names
return [name for name in candidates if name in merged_names]
def _selected_item_name(self, item: Dict | str) -> str:
if isinstance(item, str):
return item.strip()
return str(
item.get("name")
or item.get("summary")
or item.get("summary_name")
or item.get("title")
or ""
).strip()
def _match_trigger_titles(self, segment_text: str, rules: List[Dict]) -> set[str]: def _match_trigger_titles(self, segment_text: str, rules: List[Dict]) -> set[str]:
text = segment_text or "" text = segment_text or ""
matched_titles: set[str] = set() matched_titles: set[str] = set()
......
No preview for this file type
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Literal, Optional
from uuid import uuid4 from uuid import uuid4
import ast import ast
...@@ -121,6 +121,7 @@ class SegmentSummaryRequest(BaseModel): ...@@ -121,6 +121,7 @@ class SegmentSummaryRequest(BaseModel):
party_role: Optional[str] = "" party_role: Optional[str] = ""
ruleset_id: Optional[str] = "通用" ruleset_id: Optional[str] = "通用"
routed_rule_titles: Optional[List[str]] = None routed_rule_titles: Optional[List[str]] = None
routed_summary_names: Optional[List[str]] = None
file_ext: str file_ext: str
context_facts: Optional[Dict] = None context_facts: Optional[Dict] = None
...@@ -151,10 +152,23 @@ def summarize_facts(payload: SegmentSummaryRequest) -> SegmentSummaryResponse: ...@@ -151,10 +152,23 @@ def summarize_facts(payload: SegmentSummaryRequest) -> SegmentSummaryResponse:
) )
ruleset_id = payload.ruleset_id or reference_tool.default_ruleset_id ruleset_id = payload.ruleset_id or reference_tool.default_ruleset_id
rules = reference_tool.run( if payload.routed_summary_names is not None:
ruleset_id=ruleset_id, summary_names = {
routed_rule_titles=payload.routed_rule_titles, name.strip()
).get("rules", []) for name in payload.routed_summary_names
if isinstance(name, str) and name.strip()
}
all_rules = reference_tool.run(ruleset_id=ruleset_id).get("rules", [])
rules = [
rule
for rule in all_rules
if str(rule.get("summary", "")).strip() in summary_names
]
else:
rules = reference_tool.run(
ruleset_id=ruleset_id,
routed_rule_titles=payload.routed_rule_titles,
).get("rules", [])
result = summary_tool.run( result = summary_tool.run(
segment_id=segment_idx, segment_id=segment_idx,
segment_text=segment_text, segment_text=segment_text,
...@@ -182,6 +196,9 @@ class SegmentReviewRequest(BaseModel): ...@@ -182,6 +196,9 @@ class SegmentReviewRequest(BaseModel):
routed_rule_titles: Optional[List[str]] = None routed_rule_titles: Optional[List[str]] = None
file_ext: str file_ext: str
context_memories: Optional[List[Dict]] = None context_memories: Optional[List[Dict]] = None
route_by: Literal["rule", "summary"] = Field(
default="rule", description="路由依据:rule=审查规则项,summary=摘要项"
)
class SegmentReviewResponse(BaseModel): class SegmentReviewResponse(BaseModel):
...@@ -195,7 +212,9 @@ class SegmentRuleRouterResponse(BaseModel): ...@@ -195,7 +212,9 @@ class SegmentRuleRouterResponse(BaseModel):
conversation_id: str conversation_id: str
segment_id: int segment_id: int
ruleset_id: str ruleset_id: str
route_by: Literal["rule", "summary"] = "rule"
routed_rule_titles: List[str] routed_rule_titles: List[str]
routed_summary_names: List[str] = Field(default_factory=list)
routed_rules: List[Dict] routed_rules: List[Dict]
...@@ -311,13 +330,16 @@ def route_segment_rules(payload: SegmentReviewRequest) -> SegmentRuleRouterRespo ...@@ -311,13 +330,16 @@ def route_segment_rules(payload: SegmentReviewRequest) -> SegmentRuleRouterRespo
rules=rules, rules=rules,
party_role=payload.party_role or "", party_role=payload.party_role or "",
context_memories=payload.context_memories, context_memories=payload.context_memories,
route_by=payload.route_by,
) )
return SegmentRuleRouterResponse( return SegmentRuleRouterResponse(
conversation_id=payload.conversation_id, conversation_id=payload.conversation_id,
segment_id=payload.segment_id, segment_id=payload.segment_id,
ruleset_id=ruleset_id, ruleset_id=ruleset_id,
route_by=result.get("route_by", payload.route_by),
routed_rule_titles=result.get("routed_rule_titles", []), routed_rule_titles=result.get("routed_rule_titles", []),
routed_summary_names=result.get("routed_summary_names", []),
routed_rules=result.get("routed_rules", []), routed_rules=result.get("routed_rules", []),
) )
......
...@@ -10,8 +10,7 @@ from utils.http_util import url_replace_fastgpt, download_file ...@@ -10,8 +10,7 @@ from utils.http_util import url_replace_fastgpt, download_file
from utils.common_util import random_str from utils.common_util import random_str
from loguru import logger from loguru import logger
import json import json
from core.config import ocr_url
ocr_url = 'http://192.168.252.71:8202/openapi/ocrUploadFile'
class OCRUtil: class OCRUtil:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment