Commit 58af8ced by ccran

feat: update rule;

parent a79604f2
......@@ -18,6 +18,7 @@ MAX_SINGLE_CHUNK_SIZE = 5000
META_KEY = "META"
DEFAULT_RULESET_ID = "通用"
ALL_RULESET_IDS = ["通用", "借款", "担保", "财务口", "金盘", "金盘简化", "麓发测试"]
MAX_WORKERS = 10
use_lufa = False
if use_lufa:
......@@ -52,7 +53,6 @@ elif system == "Darwin":
# docker设置
if use_docker:
root_path = "/app"
MAX_WORKERS = 20
LLM = {
"base_tool_llm": LLMConfig(),
"fastgpt_segment_review": LLMConfig(
......
......@@ -8,7 +8,7 @@ from core.tool import tool, tool_func
from core.tools.segment_llm import LLMTool
ROUTER_SYSTEM_PROMPT = '''
ROUTER_SYSTEM_PROMPT = """
你是合同分段规则路由智能体(SegmentRuleRouter)。
你的任务是:基于“当前分段文本”,从候选审查规则中选出“应执行审查”的规则项。
......@@ -26,10 +26,10 @@ ROUTER_SYSTEM_PROMPT = '''
- 严格输出 JSON。
- 每个命中规则需给出简短 reason,说明该分段为何与规则相关。
- 若确实没有任何相关规则,返回 {"selected_items": []}。
'''
"""
ROUTER_USER_PROMPT = '''
ROUTER_USER_PROMPT = """
【当前分段文本】
{segment_text}
......@@ -44,10 +44,10 @@ ROUTER_USER_PROMPT = '''
【任务】
请从候选规则中选择当前分段应执行的审查项,并输出 selected_items。
'''
"""
ROUTER_OUTPUT_SCHEMA = '''
ROUTER_OUTPUT_SCHEMA = """
```json
{
"selected_items": [
......@@ -58,7 +58,7 @@ ROUTER_OUTPUT_SCHEMA = '''
]
}
```
'''
"""
@tool("segment_rule_router", "分段规则路由")
......@@ -102,11 +102,7 @@ class SegmentRuleRouterTool(LLMTool):
def _build_candidate_rules(self, rules: List[Dict]) -> List[Dict]:
return [
{
r.get("title", ""): r.get("rule", "")
}
for r in rules
if r.get("title")
{r.get("title", ""): r.get("rule", "")} for r in rules if r.get("title")
]
def _route_rules(
......@@ -120,28 +116,41 @@ class SegmentRuleRouterTool(LLMTool):
return []
candidates = self._build_candidate_rules(rules)
user_content = ROUTER_USER_PROMPT.format(
user_content = (
ROUTER_USER_PROMPT.format(
segment_text=segment_text,
context_memories_json=json.dumps(context_memories or [], ensure_ascii=False),
context_memories_json=json.dumps(
context_memories or [], ensure_ascii=False
),
party_role=party_role,
candidate_rules_json=json.dumps(candidates, ensure_ascii=False),
) + ROUTER_OUTPUT_SCHEMA
)
+ ROUTER_OUTPUT_SCHEMA
)
llm_selected: List[Dict] = []
try:
resp = self.run_with_loop(self.chat_async(self.build_messages(user_content)))
resp = self.run_with_loop(
self.chat_async(self.build_messages(user_content))
)
data = self.parse_first_json(resp)
llm_selected = data.get("selected_items", []) or []
except Exception:
llm_selected = []
selected_titles = {str(item.get("title", "")).strip() for item in llm_selected if item.get("title")}
selected_titles = {
str(item.get("title", "")).strip()
for item in llm_selected
if item.get("title")
}
selected_reasons = {
str(item.get("title", "")).strip(): str(item.get("reason", "")).strip()
for item in llm_selected
if item.get("title")
}
trigger_titles = self._match_trigger_titles(segment_text=segment_text, rules=rules)
trigger_titles = self._match_trigger_titles(
segment_text=segment_text, rules=rules
)
merged_titles = selected_titles | trigger_titles
if not merged_titles:
......
......@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace:
parser.add_argument(
"--datasets-dir",
type=Path,
default=base / "results" / "jp-output-lufa-20260416-000112",
default=base / "results" / "jp-output-lufa-20260416-235546",
help="Directory containing Word files with annotations.",
)
parser.add_argument(
......
......@@ -4,6 +4,7 @@ from openai import AsyncOpenAI
from dataclasses import dataclass
from tenacity import retry, stop_after_attempt, stop_after_delay, wait_fixed
import asyncio
from core.config import MAX_WORKERS
@dataclass
......@@ -12,8 +13,9 @@ class LLMConfig:
api_key: str
model: str
class OpenAITool:
def __init__(self, llm_config: LLMConfig, max_workers: int = 5):
def __init__(self, llm_config: LLMConfig, max_workers: int = MAX_WORKERS):
self.max_workers = max_workers
self.llm_config = llm_config
self.client = AsyncOpenAI(
......@@ -24,17 +26,11 @@ class OpenAITool:
async def chat(self, msg, tools=None):
if tools is None:
extra_body = None
if msg[0]['role'] == 'system':
extra_body = {
'variables': {
'system': msg[0]['content']
}
}
if msg[0]["role"] == "system":
extra_body = {"variables": {"system": msg[0]["content"]}}
msg = msg[1:]
response = await self.client.chat.completions.create(
model=self.llm_config.model,
messages=msg,
extra_body=extra_body
model=self.llm_config.model, messages=msg, extra_body=extra_body
)
content = response.choices[0].message.content
reasoning_content = response.choices[0].message.model_extra.get(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment