Commit 58af8ced by ccran

feat: update rule;

parent a79604f2
...@@ -18,6 +18,7 @@ MAX_SINGLE_CHUNK_SIZE = 5000 ...@@ -18,6 +18,7 @@ MAX_SINGLE_CHUNK_SIZE = 5000
META_KEY = "META" META_KEY = "META"
DEFAULT_RULESET_ID = "通用" DEFAULT_RULESET_ID = "通用"
ALL_RULESET_IDS = ["通用", "借款", "担保", "财务口", "金盘", "金盘简化", "麓发测试"] ALL_RULESET_IDS = ["通用", "借款", "担保", "财务口", "金盘", "金盘简化", "麓发测试"]
MAX_WORKERS = 10
use_lufa = False use_lufa = False
if use_lufa: if use_lufa:
...@@ -52,7 +53,6 @@ elif system == "Darwin": ...@@ -52,7 +53,6 @@ elif system == "Darwin":
# docker设置 # docker设置
if use_docker: if use_docker:
root_path = "/app" root_path = "/app"
MAX_WORKERS = 20
LLM = { LLM = {
"base_tool_llm": LLMConfig(), "base_tool_llm": LLMConfig(),
"fastgpt_segment_review": LLMConfig( "fastgpt_segment_review": LLMConfig(
......
...@@ -8,7 +8,7 @@ from core.tool import tool, tool_func ...@@ -8,7 +8,7 @@ from core.tool import tool, tool_func
from core.tools.segment_llm import LLMTool from core.tools.segment_llm import LLMTool
ROUTER_SYSTEM_PROMPT = ''' ROUTER_SYSTEM_PROMPT = """
你是合同分段规则路由智能体(SegmentRuleRouter)。 你是合同分段规则路由智能体(SegmentRuleRouter)。
你的任务是:基于“当前分段文本”,从候选审查规则中选出“应执行审查”的规则项。 你的任务是:基于“当前分段文本”,从候选审查规则中选出“应执行审查”的规则项。
...@@ -26,10 +26,10 @@ ROUTER_SYSTEM_PROMPT = ''' ...@@ -26,10 +26,10 @@ ROUTER_SYSTEM_PROMPT = '''
- 严格输出 JSON。 - 严格输出 JSON。
- 每个命中规则需给出简短 reason,说明该分段为何与规则相关。 - 每个命中规则需给出简短 reason,说明该分段为何与规则相关。
- 若确实没有任何相关规则,返回 {"selected_items": []}。 - 若确实没有任何相关规则,返回 {"selected_items": []}。
''' """
ROUTER_USER_PROMPT = ''' ROUTER_USER_PROMPT = """
【当前分段文本】 【当前分段文本】
{segment_text} {segment_text}
...@@ -44,10 +44,10 @@ ROUTER_USER_PROMPT = ''' ...@@ -44,10 +44,10 @@ ROUTER_USER_PROMPT = '''
【任务】 【任务】
请从候选规则中选择当前分段应执行的审查项,并输出 selected_items。 请从候选规则中选择当前分段应执行的审查项,并输出 selected_items。
''' """
ROUTER_OUTPUT_SCHEMA = ''' ROUTER_OUTPUT_SCHEMA = """
```json ```json
{ {
"selected_items": [ "selected_items": [
...@@ -58,7 +58,7 @@ ROUTER_OUTPUT_SCHEMA = ''' ...@@ -58,7 +58,7 @@ ROUTER_OUTPUT_SCHEMA = '''
] ]
} }
``` ```
''' """
@tool("segment_rule_router", "分段规则路由") @tool("segment_rule_router", "分段规则路由")
...@@ -102,11 +102,7 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -102,11 +102,7 @@ class SegmentRuleRouterTool(LLMTool):
def _build_candidate_rules(self, rules: List[Dict]) -> List[Dict]: def _build_candidate_rules(self, rules: List[Dict]) -> List[Dict]:
return [ return [
{ {r.get("title", ""): r.get("rule", "")} for r in rules if r.get("title")
r.get("title", ""): r.get("rule", "")
}
for r in rules
if r.get("title")
] ]
def _route_rules( def _route_rules(
...@@ -120,28 +116,41 @@ class SegmentRuleRouterTool(LLMTool): ...@@ -120,28 +116,41 @@ class SegmentRuleRouterTool(LLMTool):
return [] return []
candidates = self._build_candidate_rules(rules) candidates = self._build_candidate_rules(rules)
user_content = ROUTER_USER_PROMPT.format( user_content = (
ROUTER_USER_PROMPT.format(
segment_text=segment_text, segment_text=segment_text,
context_memories_json=json.dumps(context_memories or [], ensure_ascii=False), context_memories_json=json.dumps(
context_memories or [], ensure_ascii=False
),
party_role=party_role, party_role=party_role,
candidate_rules_json=json.dumps(candidates, ensure_ascii=False), candidate_rules_json=json.dumps(candidates, ensure_ascii=False),
) + ROUTER_OUTPUT_SCHEMA )
+ ROUTER_OUTPUT_SCHEMA
)
llm_selected: List[Dict] = [] llm_selected: List[Dict] = []
try: try:
resp = self.run_with_loop(self.chat_async(self.build_messages(user_content))) resp = self.run_with_loop(
self.chat_async(self.build_messages(user_content))
)
data = self.parse_first_json(resp) data = self.parse_first_json(resp)
llm_selected = data.get("selected_items", []) or [] llm_selected = data.get("selected_items", []) or []
except Exception: except Exception:
llm_selected = [] llm_selected = []
selected_titles = {str(item.get("title", "")).strip() for item in llm_selected if item.get("title")} selected_titles = {
str(item.get("title", "")).strip()
for item in llm_selected
if item.get("title")
}
selected_reasons = { selected_reasons = {
str(item.get("title", "")).strip(): str(item.get("reason", "")).strip() str(item.get("title", "")).strip(): str(item.get("reason", "")).strip()
for item in llm_selected for item in llm_selected
if item.get("title") if item.get("title")
} }
trigger_titles = self._match_trigger_titles(segment_text=segment_text, rules=rules) trigger_titles = self._match_trigger_titles(
segment_text=segment_text, rules=rules
)
merged_titles = selected_titles | trigger_titles merged_titles = selected_titles | trigger_titles
if not merged_titles: if not merged_titles:
......
...@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace: ...@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace:
parser.add_argument( parser.add_argument(
"--datasets-dir", "--datasets-dir",
type=Path, type=Path,
default=base / "results" / "jp-output-lufa-20260416-000112", default=base / "results" / "jp-output-lufa-20260416-235546",
help="Directory containing Word files with annotations.", help="Directory containing Word files with annotations.",
) )
parser.add_argument( parser.add_argument(
......
...@@ -4,6 +4,7 @@ from openai import AsyncOpenAI ...@@ -4,6 +4,7 @@ from openai import AsyncOpenAI
from dataclasses import dataclass from dataclasses import dataclass
from tenacity import retry, stop_after_attempt, stop_after_delay, wait_fixed from tenacity import retry, stop_after_attempt, stop_after_delay, wait_fixed
import asyncio import asyncio
from core.config import MAX_WORKERS
@dataclass @dataclass
...@@ -12,8 +13,9 @@ class LLMConfig: ...@@ -12,8 +13,9 @@ class LLMConfig:
api_key: str api_key: str
model: str model: str
class OpenAITool: class OpenAITool:
def __init__(self, llm_config: LLMConfig, max_workers: int = 5): def __init__(self, llm_config: LLMConfig, max_workers: int = MAX_WORKERS):
self.max_workers = max_workers self.max_workers = max_workers
self.llm_config = llm_config self.llm_config = llm_config
self.client = AsyncOpenAI( self.client = AsyncOpenAI(
...@@ -24,17 +26,11 @@ class OpenAITool: ...@@ -24,17 +26,11 @@ class OpenAITool:
async def chat(self, msg, tools=None): async def chat(self, msg, tools=None):
if tools is None: if tools is None:
extra_body = None extra_body = None
if msg[0]['role'] == 'system': if msg[0]["role"] == "system":
extra_body = { extra_body = {"variables": {"system": msg[0]["content"]}}
'variables': {
'system': msg[0]['content']
}
}
msg = msg[1:] msg = msg[1:]
response = await self.client.chat.completions.create( response = await self.client.chat.completions.create(
model=self.llm_config.model, model=self.llm_config.model, messages=msg, extra_body=extra_body
messages=msg,
extra_body=extra_body
) )
content = response.choices[0].message.content content = response.choices[0].message.content
reasoning_content = response.choices[0].message.model_extra.get( reasoning_content = response.choices[0].message.model_extra.get(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment