Commit 68c08496 by ccran

feat: 添加反思;添加多合同支持;

parent 9fa3fd96
...@@ -262,8 +262,8 @@ class MemoryStore: ...@@ -262,8 +262,8 @@ class MemoryStore:
with self._lock: with self._lock:
wb = Workbook() wb = Workbook()
ws_findings = wb.active ws_final_findings = wb.active
ws_findings.title = "findings" ws_final_findings.title = "findings"
finding_headers = [ finding_headers = [
("rule_title", "规则标题"), ("rule_title", "规则标题"),
...@@ -273,19 +273,19 @@ class MemoryStore: ...@@ -273,19 +273,19 @@ class MemoryStore:
("risk_level", "风险等级"), ("risk_level", "风险等级"),
("suggestion", "建议"), ("suggestion", "建议"),
] ]
ws_findings.append([label for _, label in finding_headers])
for f in self.findings:
ws_findings.append([
getattr(f, key, "") for key, _ in finding_headers
])
ws_final_findings = wb.create_sheet("final_findings")
ws_final_findings.append([label for _, label in finding_headers]) ws_final_findings.append([label for _, label in finding_headers])
for f in self.final_findings: for f in self.final_findings:
ws_final_findings.append([ ws_final_findings.append([
getattr(f, key, "") for key, _ in finding_headers getattr(f, key, "") for key, _ in finding_headers
]) ])
ws_findings = wb.create_sheet("findings")
ws_findings.append([label for _, label in finding_headers])
for f in self.findings:
ws_findings.append([
getattr(f, key, "") for key, _ in finding_headers
])
ws_facts = wb.create_sheet("facts") ws_facts = wb.create_sheet("facts")
if self.facts: if self.facts:
fact_keys: List[str] = sorted({k for item in self.facts for k in item.keys()}) fact_keys: List[str] = sorted({k for item in self.facts for k in item.keys()})
...@@ -321,6 +321,7 @@ class MemoryStore: ...@@ -321,6 +321,7 @@ class MemoryStore:
doc_obj: DocBase, doc_obj: DocBase,
file_name: Optional[str] = None, file_name: Optional[str] = None,
remove_prefix: bool = False, remove_prefix: bool = False,
export_final: bool = False,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Add all findings as comments to a document, upload, then delete the local file.""" """Add all findings as comments to a document, upload, then delete the local file."""
if doc_obj is None: if doc_obj is None:
...@@ -334,11 +335,14 @@ class MemoryStore: ...@@ -334,11 +335,14 @@ class MemoryStore:
name = f"{name}{suffix}" name = f"{name}{suffix}"
output_path = Path(__file__).resolve().parent.parent / "tmp" / name output_path = Path(__file__).resolve().parent.parent / "tmp" / name
if export_final:
target_findings = self.final_findings
else:
target_findings = self.findings
with self._lock: with self._lock:
comments: List[Dict[str, Any]] = [] comments: List[Dict[str, Any]] = []
for idx, f in enumerate(self.findings, start=1): for idx, f in enumerate(target_findings, start=1):
# 导出final_findings
# for idx, f in enumerate(self.final_findings, start=1):
segment_id = int(f.segment_id or 0) segment_id = int(f.segment_id or 0)
chunk_id = max(segment_id - 1, 0) chunk_id = max(segment_id - 1, 0)
suggest_parts = [] suggest_parts = []
......
...@@ -8,21 +8,12 @@ from core.tools.segment_llm import LLMTool ...@@ -8,21 +8,12 @@ from core.tools.segment_llm import LLMTool
REFLECT_SYSTEM_PROMPT = ''' REFLECT_SYSTEM_PROMPT = '''
你是一个合同审查反思智能体(ReviewReflection)。 你是一个合同审查反思智能体(ReviewReflection)。
你要基于 facts 与全文上下文,对已有 findings 进行校正后,输出【最终可交付的 findings 列表】。
你的任务是:基于 facts 与全文上下文, 要求:
对已有 findings 输出修改操作。
你只能对 findings 执行以下四种操作:
- keep:确认该风险结论成立且无需修改
- update:修改一个已有风险
- add:新增一个由全文结构推导出的风险
- remove:删除一个不成立的风险
【输出约束】
- 输出必须是 JSON
- 每条操作必须仅包含字段:op、id、findings
- findings 在 add / update 时必须是完整 finding
- 严格按照输出 JSON Schema 返回结果,不得输出任何解释性文字 - 严格按照输出 JSON Schema 返回结果,不得输出任何解释性文字
- 最终 findings 中每条都必须证据充分,original_text 必须是合同原文直接引用
- 不得引入新的审查维度,只能基于已有 findings 的范围做合并、修订、删除或系统性总结
''' '''
REFLECT_USER_PROMPT = ''' REFLECT_USER_PROMPT = '''
...@@ -36,28 +27,23 @@ REFLECT_USER_PROMPT = ''' ...@@ -36,28 +27,23 @@ REFLECT_USER_PROMPT = '''
【合同立场】 【合同立场】
站在 {party_role} 的立场进行反思审查。 站在 {party_role} 的立场进行反思审查。
【反思原则】 【任务】
- 不得引入新的审查维度 输出反思后的最终 findings 列表(可直接用于最终审查报告):
- 所有判断必须有 facts 或合同原文证据支持 - 删除在全文上下文中不成立的 findings
- 仅对已有 findings 进行增add、改update、删remove、保留keep操作 - 修订表述/严重性/建议不准确的 findings
- 若风险在全文上下文中不成立,必须 remove - 如需合并重复 findings,请合并成一条(保留全部原文证据引用)
- 若风险成立但表述、严重性或建议不准确,必须 update - 如可由全文结构推导出系统性风险,可新增 1~3 条 global findings(仍需原文证据)
- 若由多个 findings 或全文结构推导出新的系统性风险,可 add
【输出要求】
- 输出必须是 JSON
- 每条操作必须仅包含字段:op、id、findings
- findings 在 add / update 时必须是完整 finding
- 严格按照输出 JSON Schema 返回结果,不得输出任何解释性文字 - 严格按照输出 JSON Schema 返回结果,不得输出任何解释性文字
''' '''
OUTPUT_FORMAT_SCHEMA = ''' OUTPUT_FORMAT_SCHEMA = '''
```json ```json
{ {
"operations": [ "final_findings": [
{ {
"op": "keep | add | update | remove", "segment_id":"合同原文片段所在的段落ID",
"id": "string | null", "issue": "详细的风险描述",
"findings": "object | null" "original_text": "合同原文片段的直接引用",
"suggestion": "可直接替换原文或新增的条款措辞"
} }
] ]
} }
...@@ -98,9 +84,13 @@ class ReflectRetryTool(LLMTool): ...@@ -98,9 +84,13 @@ class ReflectRetryTool(LLMTool):
except Exception: except Exception:
data = {} data = {}
operations = data.get("operations", []) or [] final_findings = data.get("final_findings", []) or []
final_findings = self._apply_operations(base_findings, operations) for finding in final_findings:
try:
finding['segment_id'] = int(finding.get('segment_id', 0))
except Exception:
finding['segment_id'] = 0
finding['rule_title'] = rule.get('title','')
return final_findings return final_findings
def _build_findings_with_ids(self, findings: List[Dict]) -> List[Dict[str, Any]]: def _build_findings_with_ids(self, findings: List[Dict]) -> List[Dict[str, Any]]:
...@@ -112,33 +102,6 @@ class ReflectRetryTool(LLMTool): ...@@ -112,33 +102,6 @@ class ReflectRetryTool(LLMTool):
res.append(item) res.append(item)
return res return res
def _apply_operations(self, base_findings: List[Dict[str, Any]], operations: List[Dict]) -> List[Dict[str, Any]]:
by_id = {str(f.get("id")): dict(f) for f in base_findings if f.get("id") is not None}
added: List[Dict[str, Any]] = []
for op in operations:
action = (op.get("op") or "").strip().lower()
target_id = op.get("id")
payload = op.get("findings")
if action == "keep":
if target_id is not None and str(target_id) in by_id:
continue
elif action == "remove":
if target_id is not None:
by_id.pop(str(target_id), None)
elif action == "update":
if target_id is not None and isinstance(payload, dict):
payload = dict(payload)
payload["id"] = str(target_id)
by_id[str(target_id)] = payload
elif action == "add":
if isinstance(payload, dict):
added.append(dict(payload))
merged = list(by_id.values()) + added
return merged
if __name__ == "__main__": if __name__ == "__main__":
tool = ReflectRetryTool() tool = ReflectRetryTool()
......
...@@ -10,6 +10,9 @@ from utils.excel_util import ExcelUtil ...@@ -10,6 +10,9 @@ from utils.excel_util import ExcelUtil
from core.tools.segment_llm import LLMTool from core.tools.segment_llm import LLMTool
import re import re
DEFAULT_RULESET_ID = "通用"
ALL_RULESET_IDS = ["通用","借款","担保","测试"]
REVIEW_SYSTEM_PROMPT = ''' REVIEW_SYSTEM_PROMPT = '''
你是一个专业的合同分段审查智能体(SegmentReview)。 你是一个专业的合同分段审查智能体(SegmentReview)。
你的核心任务是对“当前分段”进行【法律风险识别】,并给出可落地的修改建议。 你的核心任务是对“当前分段”进行【法律风险识别】,并给出可落地的修改建议。
...@@ -103,7 +106,7 @@ def _is_generic_suggestion(text: str) -> bool: ...@@ -103,7 +106,7 @@ def _is_generic_suggestion(text: str) -> bool:
class SegmentReviewTool(LLMTool): class SegmentReviewTool(LLMTool):
def __init__(self): def __init__(self):
super().__init__(REVIEW_SYSTEM_PROMPT) super().__init__(REVIEW_SYSTEM_PROMPT)
self.rule_version = "通用" self.default_ruleset_id = DEFAULT_RULESET_ID
self.column_map = { self.column_map = {
"id": "ID", "id": "ID",
"title": "审查项", "title": "审查项",
...@@ -113,8 +116,10 @@ class SegmentReviewTool(LLMTool): ...@@ -113,8 +116,10 @@ class SegmentReviewTool(LLMTool):
"suggestion_template": "建议模板", "suggestion_template": "建议模板",
} }
rules_path = Path(__file__).resolve().parent.parent.parent / "data" / "rules.xlsx" rules_path = Path(__file__).resolve().parent.parent.parent / "data" / "rules.xlsx"
rules = ExcelUtil.load_mapped_excel(rules_path, sheet_name=self.rule_version, column_map=self.column_map) self.rulesets: Dict[str, List[Dict]] = {}
self.rulesets: Dict[str, List[Dict]] = {self.rule_version: rules} if rules else {} for rs_id in ALL_RULESET_IDS:
rules = ExcelUtil.load_mapped_excel(rules_path, sheet_name=rs_id, column_map=self.column_map)
self.rulesets[rs_id] = rules
@tool_func( @tool_func(
{ {
...@@ -130,7 +135,7 @@ class SegmentReviewTool(LLMTool): ...@@ -130,7 +135,7 @@ class SegmentReviewTool(LLMTool):
} }
) )
def run(self, segment_id: str, segment_text: str, ruleset_id: str, party_role: str, context_memories: Optional[List[Dict]] = None) -> Dict: def run(self, segment_id: str, segment_text: str, ruleset_id: str, party_role: str, context_memories: Optional[List[Dict]] = None) -> Dict:
rules = self.rulesets.get(ruleset_id) or self.rulesets.get(self.rule_version, []) or [] rules = self.rulesets.get(ruleset_id) or self.rulesets.get(self.default_ruleset_id, []) or []
result = self._evaluate_rules(party_role,segment_id,segment_text,rules, context_memories) result = self._evaluate_rules(party_role,segment_id,segment_text,rules, context_memories)
overall = "revise" if (result["findings"] ) else "pass" overall = "revise" if (result["findings"] ) else "pass"
......
No preview for this file type
...@@ -67,7 +67,7 @@ async def parse_document(payload: DocumentParseRequest) -> DocumentParseResponse ...@@ -67,7 +67,7 @@ async def parse_document(payload: DocumentParseRequest) -> DocumentParseResponse
# text = doc_obj.get_all_text() # text = doc_obj.get_all_text()
chunk_ids = doc_obj.get_chunk_id_list() chunk_ids = doc_obj.get_chunk_id_list()
# get ruleset items # get ruleset items
ruleset_id = payload.ruleset_id or review_tool.rule_version ruleset_id = payload.ruleset_id or review_tool.default_ruleset_id
ruleset_items = review_tool.rulesets.get(ruleset_id) or [] ruleset_items = review_tool.rulesets.get(ruleset_id) or []
ruleset_review_items = [r.get('title') for r in ruleset_items] ruleset_review_items = [r.get('title') for r in ruleset_items]
return DocumentParseResponse( return DocumentParseResponse(
...@@ -198,15 +198,13 @@ class ReflectReviewResponse(BaseModel): ...@@ -198,15 +198,13 @@ class ReflectReviewResponse(BaseModel):
@app.post("/segments/review/reflect", response_model=ReflectReviewResponse) @app.post("/segments/review/reflect", response_model=ReflectReviewResponse)
def reflect_review(payload: ReflectReviewRequest) -> ReflectReviewResponse: def reflect_review(payload: ReflectReviewRequest) -> ReflectReviewResponse:
store = get_cached_memory(payload.conversation_id) store = get_cached_memory(payload.conversation_id)
ruleset_id = payload.ruleset_id or review_tool.rule_version ruleset_id = payload.ruleset_id or review_tool.default_ruleset_id
ruleset_items = review_tool.rulesets.get(ruleset_id) or [] ruleset_items = review_tool.rulesets.get(ruleset_id) or []
rule = next((r for r in ruleset_items if r.get("title") == payload.rule_title), None) rule = next((r for r in ruleset_items if r.get("title") == payload.rule_title), None)
if not rule: if not rule:
raise HTTPException(status_code=404, detail=f"Rule not found: {payload.rule_title}") raise HTTPException(status_code=404, detail=f"Rule not found: {payload.rule_title}")
facts = store.get_facts() facts = store.get_facts()
findings = [f.__dict__ for f in store.list_findings()] findings = [f.__dict__ for f in store.search_findings("", rule_title=payload.rule_title)]
final_findings = reflect_tool.run( final_findings = reflect_tool.run(
party_role=payload.party_role, party_role=payload.party_role,
rule=rule, rule=rule,
...@@ -221,12 +219,12 @@ def reflect_review(payload: ReflectReviewRequest) -> ReflectReviewResponse: ...@@ -221,12 +219,12 @@ def reflect_review(payload: ReflectReviewRequest) -> ReflectReviewResponse:
"segment_id": f.get("segment_id", 0), "segment_id": f.get("segment_id", 0),
"original_text": f.get("original_text", ""), "original_text": f.get("original_text", ""),
"issue": f.get("issue", ""), "issue": f.get("issue", ""),
"risk_level": (f.get("risk_level") or f.get("level") or "").upper(), "risk_level": (f.get("risk_level") or f.get("level") or "H").upper(),
"suggestion": f.get("suggestion", ""), "suggestion": f.get("suggestion", ""),
}) })
# print(f'len(store) final_findings:{len(store.final_findings)}')
except Exception: except Exception:
continue continue
return ReflectReviewResponse( return ReflectReviewResponse(
conversation_id=payload.conversation_id, conversation_id=payload.conversation_id,
rule_title=payload.rule_title, rule_title=payload.rule_title,
...@@ -250,6 +248,7 @@ class MemoryExportRequest(BaseModel): ...@@ -250,6 +248,7 @@ class MemoryExportRequest(BaseModel):
conversation_id: str conversation_id: str
file_ext: str file_ext: str
file_name: Optional[str] = None file_name: Optional[str] = None
export_final: Optional[bool] = False
class MemoryExportResponse(BaseModel): class MemoryExportResponse(BaseModel):
...@@ -273,7 +272,7 @@ def export_memory(payload: MemoryExportRequest) -> MemoryExportResponse: ...@@ -273,7 +272,7 @@ def export_memory(payload: MemoryExportRequest) -> MemoryExportResponse:
raise HTTPException(status_code=500, detail=f"Export failed: {exc}") raise HTTPException(status_code=500, detail=f"Export failed: {exc}")
try: try:
doc_res = store.export_findings_to_doc_comments(doc_obj) doc_res = store.export_findings_to_doc_comments(doc_obj,export_final=payload.export_final or False)
except Exception as exc: except Exception as exc:
traceback.print_exc() traceback.print_exc()
raise HTTPException(status_code=500, detail=f"Export doc comments failed: {exc}") raise HTTPException(status_code=500, detail=f"Export doc comments failed: {exc}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment