Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
ccran
/
lufa-contract
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
49b473ef
authored
May 21, 2026
by
ccran
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: 新增摘要项;
parent
e48b2cd5
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
213 additions
and
8 deletions
+213
-8
core/config.py
+4
-0
core/memory.py
+39
-2
core/tools/segment_llm.py
+2
-2
core/tools/segment_rule_router.py
+144
-1
data/rules.xlsx
+0
-0
main.py
+23
-1
utils/ocr_util.py
+1
-2
No files found.
core/config.py
View file @
49b473ef
...
@@ -24,6 +24,7 @@ MERGE_RULE_PROMPT = False
...
@@ -24,6 +24,7 @@ MERGE_RULE_PROMPT = False
MAX_SINGLE_CHUNK_SIZE
=
5000
MAX_SINGLE_CHUNK_SIZE
=
5000
META_KEY
=
"META"
META_KEY
=
"META"
DEFAULT_RULESET_ID
=
"通用"
DEFAULT_RULESET_ID
=
"通用"
## 规则集ID列表,需与rules.xlsx中的sheet名称保持一致!!!
ALL_RULESET_IDS
=
[
ALL_RULESET_IDS
=
[
"通用"
,
"通用"
,
"借款"
,
"借款"
,
...
@@ -33,6 +34,7 @@ ALL_RULESET_IDS = [
...
@@ -33,6 +34,7 @@ ALL_RULESET_IDS = [
"金盘简化"
,
"金盘简化"
,
"麓发测试"
,
"麓发测试"
,
"麓发标准"
,
"麓发标准"
,
"金盘B类"
]
]
MAX_WORKERS
=
10
MAX_WORKERS
=
10
FILE_SUFFIX
=
"-审核批注"
FILE_SUFFIX
=
"-审核批注"
...
@@ -43,6 +45,7 @@ use_lufa = False
...
@@ -43,6 +45,7 @@ use_lufa = False
use_jp_machine
=
True
use_jp_machine
=
True
## 关键参数**
## 关键参数**
ocr_url
=
'http://192.168.252.71:8202/openapi/ocrUploadFile'
if
use_lufa
:
if
use_lufa
:
outer_backend_url
=
"http://znkf.lgfzgroup.com:48081"
outer_backend_url
=
"http://znkf.lgfzgroup.com:48081"
base_fastgpt_url
=
"http://192.168.252.71:18089"
base_fastgpt_url
=
"http://192.168.252.71:18089"
...
@@ -62,6 +65,7 @@ else:
...
@@ -62,6 +65,7 @@ else:
outer_backend_url
=
"http://172.21.107.45:48080"
outer_backend_url
=
"http://172.21.107.45:48080"
base_fastgpt_url
=
"http://172.21.107.45:3030"
base_fastgpt_url
=
"http://172.21.107.45:3030"
base_backend_url
=
"http://172.21.107.45:48080"
base_backend_url
=
"http://172.21.107.45:48080"
ocr_url
=
"http://172.21.107.45:8202/openapi/ocrUploadFile"
segment_review_api_key
=
(
segment_review_api_key
=
(
"fastgpt-vLu2JHAfqwEq5FUQhvATFDK0yDS6fs804v7KwWBMyU4sRrHzh4UGl89Zpa"
"fastgpt-vLu2JHAfqwEq5FUQhvATFDK0yDS6fs804v7KwWBMyU4sRrHzh4UGl89Zpa"
)
)
...
...
core/memory.py
View file @
49b473ef
...
@@ -347,13 +347,13 @@ class MemoryStore:
...
@@ -347,13 +347,13 @@ class MemoryStore:
ws_facts
.
append
([
"元信息"
,
"事实内容"
])
ws_facts
.
append
([
"元信息"
,
"事实内容"
])
for
item
in
self
.
facts
:
for
item
in
self
.
facts
:
if
not
isinstance
(
item
,
dict
):
if
not
isinstance
(
item
,
dict
):
ws_facts
.
append
([
"事实"
,
json
.
dumps
(
item
,
ensure_ascii
=
False
)])
ws_facts
.
append
([
"事实"
,
self
.
_format_summary_for_export
(
item
)])
continue
continue
meta_info
=
item
.
get
(
META_KEY
,
None
)
meta_info
=
item
.
get
(
META_KEY
,
None
)
ws_facts
.
append
(
ws_facts
.
append
(
[
[
json
.
dumps
(
meta_info
,
ensure_ascii
=
False
),
json
.
dumps
(
meta_info
,
ensure_ascii
=
False
),
json
.
dumps
(
item
,
ensure_ascii
=
False
),
self
.
_format_summary_for_export
(
item
),
]
]
)
)
else
:
else
:
...
@@ -443,6 +443,43 @@ class MemoryStore:
...
@@ -443,6 +443,43 @@ class MemoryStore:
return
safe
[:
31
]
return
safe
[:
31
]
@staticmethod
@staticmethod
def
_format_summary_for_export
(
value
:
Any
,
level
:
int
=
0
)
->
str
:
indent
=
" "
*
level
if
isinstance
(
value
,
dict
):
lines
:
List
[
str
]
=
[]
for
key
,
child
in
value
.
items
():
if
key
==
META_KEY
:
continue
key_text
=
str
(
key
)
if
isinstance
(
child
,
(
dict
,
list
)):
lines
.
append
(
f
"{indent}{key_text}:"
)
child_text
=
MemoryStore
.
_format_summary_for_export
(
child
,
level
+
1
)
if
child_text
:
lines
.
append
(
child_text
)
else
:
lines
.
append
(
f
"{indent}{key_text}:{MemoryStore._format_scalar(child)}"
)
return
"
\n
"
.
join
(
lines
)
if
isinstance
(
value
,
list
):
lines
=
[]
for
item
in
value
:
if
isinstance
(
item
,
(
dict
,
list
)):
item_text
=
MemoryStore
.
_format_summary_for_export
(
item
,
level
)
if
item_text
:
lines
.
append
(
item_text
)
else
:
lines
.
append
(
f
"{indent}{MemoryStore._format_scalar(item)}"
)
return
"
\n
"
.
join
(
lines
)
return
f
"{indent}{MemoryStore._format_scalar(value)}"
@staticmethod
def
_format_scalar
(
value
:
Any
)
->
str
:
if
value
is
None
:
return
""
return
str
(
value
)
@staticmethod
def
_normalize_finding_key
(
key
:
str
)
->
str
:
def
_normalize_finding_key
(
key
:
str
)
->
str
:
normalized
=
(
key
or
""
)
.
strip
()
.
lower
()
normalized
=
(
key
or
""
)
.
strip
()
.
lower
()
if
not
normalized
:
if
not
normalized
:
...
...
core/tools/segment_llm.py
View file @
49b473ef
...
@@ -19,9 +19,9 @@ class LLMTool(ToolBase):
...
@@ -19,9 +19,9 @@ class LLMTool(ToolBase):
self
.
system_prompt
=
system_prompt
self
.
system_prompt
=
system_prompt
self
.
llm
=
OpenAITool
(
LLM
[
llm_key
],
max_workers
=
MAX_WORKERS
)
self
.
llm
=
OpenAITool
(
LLM
[
llm_key
],
max_workers
=
MAX_WORKERS
)
def
build_messages
(
self
,
user_content
:
str
)
->
List
[
Dict
[
str
,
str
]]:
def
build_messages
(
self
,
user_content
:
str
,
system_content
:
str
=
None
)
->
List
[
Dict
[
str
,
str
]]:
return
[
return
[
{
"role"
:
"system"
,
"content"
:
self
.
system_prompt
},
{
"role"
:
"system"
,
"content"
:
s
ystem_content
or
s
elf
.
system_prompt
},
{
"role"
:
"user"
,
"content"
:
user_content
},
{
"role"
:
"user"
,
"content"
:
user_content
},
]
]
...
...
core/tools/segment_rule_router.py
View file @
49b473ef
...
@@ -2,7 +2,7 @@ from __future__ import annotations
...
@@ -2,7 +2,7 @@ from __future__ import annotations
import
json
import
json
import
re
import
re
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Literal
,
Optional
from
core.tool
import
tool
,
tool_func
from
core.tool
import
tool
,
tool_func
from
core.tools.segment_llm
import
LLMTool
from
core.tools.segment_llm
import
LLMTool
...
@@ -47,6 +47,46 @@ ROUTER_USER_PROMPT = """
...
@@ -47,6 +47,46 @@ ROUTER_USER_PROMPT = """
"""
"""
SUMMARY_ROUTER_SYSTEM_PROMPT
=
"""
你是合同分段摘要项路由智能体(SegmentSummaryRouter)。
你的任务是:基于“当前分段文本”,从候选摘要项名称中选出当前分段应提取的摘要项。
【路由目标】
- 仅做摘要项适配判断,不输出事实摘要、不输出风险结论、不输出审查建议。
- 候选摘要项只有名称,没有规则正文、触发词或其他辅助信息。
- 高召回优先:只要当前分段明显包含某个摘要项所需的信息,就应路由命中。
- 若候选摘要项明显无关,则不要命中。
【判断依据】
- 以当前分段文本为主。
- 可参考上下文记忆辅助理解术语,但不得脱离当前分段文本做臆断。
【输出约束】
- 严格输出 JSON。
- 摘要项路由只输出命中的摘要项名称,不输出其他信息。
- 若确实没有任何相关摘要项,返回 {"selected_items": []}。
"""
SUMMARY_ROUTER_USER_PROMPT
=
"""
【当前分段文本】
{segment_text}
【上下文记忆】
{context_memories_json}
【合同立场】
{party_role}
【候选摘要项名称】
{candidate_summaries_json}
【任务】
请从候选摘要项名称中选择当前分段应提取的摘要项,并输出 selected_items。
"""
ROUTER_OUTPUT_SCHEMA
=
"""
ROUTER_OUTPUT_SCHEMA
=
"""
```json
```json
{
{
...
@@ -61,6 +101,19 @@ ROUTER_OUTPUT_SCHEMA = """
...
@@ -61,6 +101,19 @@ ROUTER_OUTPUT_SCHEMA = """
"""
"""
SUMMARY_ROUTER_OUTPUT_SCHEMA
=
"""
```json
{
"selected_items": [
{
"name": "摘要项名称"
}
]
}
```
"""
@tool
(
"segment_rule_router"
,
"分段规则路由"
)
@tool
(
"segment_rule_router"
,
"分段规则路由"
)
class
SegmentRuleRouterTool
(
LLMTool
):
class
SegmentRuleRouterTool
(
LLMTool
):
def
__init__
(
self
)
->
None
:
def
__init__
(
self
)
->
None
:
...
@@ -75,6 +128,7 @@ class SegmentRuleRouterTool(LLMTool):
...
@@ -75,6 +128,7 @@ class SegmentRuleRouterTool(LLMTool):
"rules"
:
{
"type"
:
"array"
,
"items"
:
{
"type"
:
"object"
}},
"rules"
:
{
"type"
:
"array"
,
"items"
:
{
"type"
:
"object"
}},
"party_role"
:
{
"type"
:
"string"
},
"party_role"
:
{
"type"
:
"string"
},
"context_memories"
:
{
"type"
:
"array"
},
"context_memories"
:
{
"type"
:
"array"
},
"route_by"
:
{
"type"
:
"string"
},
},
},
"required"
:
[
"segment_id"
,
"segment_text"
,
"rules"
,
"party_role"
],
"required"
:
[
"segment_id"
,
"segment_text"
,
"rules"
,
"party_role"
],
}
}
...
@@ -86,8 +140,24 @@ class SegmentRuleRouterTool(LLMTool):
...
@@ -86,8 +140,24 @@ class SegmentRuleRouterTool(LLMTool):
rules
:
List
[
Dict
],
rules
:
List
[
Dict
],
party_role
:
str
,
party_role
:
str
,
context_memories
:
Optional
[
List
[
Dict
]]
=
None
,
context_memories
:
Optional
[
List
[
Dict
]]
=
None
,
route_by
:
Literal
[
"rule"
,
"summary"
]
=
"rule"
,
)
->
Dict
:
)
->
Dict
:
rules
=
rules
or
[]
rules
=
rules
or
[]
if
route_by
==
"summary"
:
routed_summary_names
=
self
.
_route_summaries
(
segment_text
=
segment_text
,
rules
=
rules
,
party_role
=
party_role
,
context_memories
=
context_memories
,
)
return
{
"segment_id"
:
segment_id
,
"route_by"
:
route_by
,
"routed_rules"
:
[],
"routed_rule_titles"
:
[],
"routed_summary_names"
:
routed_summary_names
,
}
routed_rules
=
self
.
_route_rules
(
routed_rules
=
self
.
_route_rules
(
segment_text
=
segment_text
,
segment_text
=
segment_text
,
rules
=
rules
,
rules
=
rules
,
...
@@ -96,8 +166,10 @@ class SegmentRuleRouterTool(LLMTool):
...
@@ -96,8 +166,10 @@ class SegmentRuleRouterTool(LLMTool):
)
)
return
{
return
{
"segment_id"
:
segment_id
,
"segment_id"
:
segment_id
,
"route_by"
:
route_by
,
"routed_rules"
:
routed_rules
,
"routed_rules"
:
routed_rules
,
"routed_rule_titles"
:
[
r
.
get
(
"title"
,
""
)
for
r
in
routed_rules
],
"routed_rule_titles"
:
[
r
.
get
(
"title"
,
""
)
for
r
in
routed_rules
],
"routed_summary_names"
:
[],
}
}
def
_build_candidate_rules
(
self
,
rules
:
List
[
Dict
])
->
List
[
Dict
]:
def
_build_candidate_rules
(
self
,
rules
:
List
[
Dict
])
->
List
[
Dict
]:
...
@@ -105,6 +177,17 @@ class SegmentRuleRouterTool(LLMTool):
...
@@ -105,6 +177,17 @@ class SegmentRuleRouterTool(LLMTool):
{
r
.
get
(
"title"
,
""
):
r
.
get
(
"rule"
,
""
)}
for
r
in
rules
if
r
.
get
(
"title"
)
{
r
.
get
(
"title"
,
""
):
r
.
get
(
"rule"
,
""
)}
for
r
in
rules
if
r
.
get
(
"title"
)
]
]
def
_build_candidate_summaries
(
self
,
rules
:
List
[
Dict
])
->
List
[
str
]:
summaries
:
List
[
str
]
=
[]
seen
:
set
[
str
]
=
set
()
for
rule
in
rules
:
summary
=
str
(
rule
.
get
(
"summary"
,
""
))
.
strip
()
if
not
summary
or
summary
in
seen
:
continue
summaries
.
append
(
summary
)
seen
.
add
(
summary
)
return
summaries
def
_route_rules
(
def
_route_rules
(
self
,
self
,
segment_text
:
str
,
segment_text
:
str
,
...
@@ -183,6 +266,66 @@ class SegmentRuleRouterTool(LLMTool):
...
@@ -183,6 +266,66 @@ class SegmentRuleRouterTool(LLMTool):
)
)
return
routed_rules
return
routed_rules
def
_route_summaries
(
self
,
segment_text
:
str
,
rules
:
List
[
Dict
],
party_role
:
str
,
context_memories
:
Optional
[
List
[
Dict
]],
)
->
List
[
str
]:
candidates
=
self
.
_build_candidate_summaries
(
rules
)
if
not
candidates
:
return
[]
user_content
=
(
SUMMARY_ROUTER_USER_PROMPT
.
format
(
segment_text
=
segment_text
,
context_memories_json
=
json
.
dumps
(
context_memories
or
[],
ensure_ascii
=
False
),
party_role
=
party_role
,
candidate_summaries_json
=
json
.
dumps
(
candidates
,
ensure_ascii
=
False
),
)
+
SUMMARY_ROUTER_OUTPUT_SCHEMA
)
llm_selected
:
List
[
Dict
]
=
[]
try
:
resp
=
self
.
run_with_loop
(
self
.
chat_async
(
[
{
"role"
:
"system"
,
"content"
:
SUMMARY_ROUTER_SYSTEM_PROMPT
},
{
"role"
:
"user"
,
"content"
:
user_content
},
]
)
)
data
=
self
.
parse_first_json
(
resp
)
llm_selected
=
data
.
get
(
"selected_items"
,
[])
or
[]
except
Exception
:
llm_selected
=
[]
selected_names
=
set
()
for
item
in
llm_selected
:
name
=
self
.
_selected_item_name
(
item
)
if
name
:
selected_names
.
add
(
name
)
direct_matched_names
=
{
name
for
name
in
candidates
if
name
and
name
in
(
segment_text
or
""
)
}
merged_names
=
selected_names
|
direct_matched_names
return
[
name
for
name
in
candidates
if
name
in
merged_names
]
def
_selected_item_name
(
self
,
item
:
Dict
|
str
)
->
str
:
if
isinstance
(
item
,
str
):
return
item
.
strip
()
return
str
(
item
.
get
(
"name"
)
or
item
.
get
(
"summary"
)
or
item
.
get
(
"summary_name"
)
or
item
.
get
(
"title"
)
or
""
)
.
strip
()
def
_match_trigger_titles
(
self
,
segment_text
:
str
,
rules
:
List
[
Dict
])
->
set
[
str
]:
def
_match_trigger_titles
(
self
,
segment_text
:
str
,
rules
:
List
[
Dict
])
->
set
[
str
]:
text
=
segment_text
or
""
text
=
segment_text
or
""
matched_titles
:
set
[
str
]
=
set
()
matched_titles
:
set
[
str
]
=
set
()
...
...
data/rules.xlsx
View file @
49b473ef
No preview for this file type
main.py
View file @
49b473ef
from
datetime
import
datetime
from
datetime
import
datetime
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
Any
,
Dict
,
List
,
Optional
from
typing
import
Any
,
Dict
,
List
,
Literal
,
Optional
from
uuid
import
uuid4
from
uuid
import
uuid4
import
ast
import
ast
...
@@ -121,6 +121,7 @@ class SegmentSummaryRequest(BaseModel):
...
@@ -121,6 +121,7 @@ class SegmentSummaryRequest(BaseModel):
party_role
:
Optional
[
str
]
=
""
party_role
:
Optional
[
str
]
=
""
ruleset_id
:
Optional
[
str
]
=
"通用"
ruleset_id
:
Optional
[
str
]
=
"通用"
routed_rule_titles
:
Optional
[
List
[
str
]]
=
None
routed_rule_titles
:
Optional
[
List
[
str
]]
=
None
routed_summary_names
:
Optional
[
List
[
str
]]
=
None
file_ext
:
str
file_ext
:
str
context_facts
:
Optional
[
Dict
]
=
None
context_facts
:
Optional
[
Dict
]
=
None
...
@@ -151,6 +152,19 @@ def summarize_facts(payload: SegmentSummaryRequest) -> SegmentSummaryResponse:
...
@@ -151,6 +152,19 @@ def summarize_facts(payload: SegmentSummaryRequest) -> SegmentSummaryResponse:
)
)
ruleset_id
=
payload
.
ruleset_id
or
reference_tool
.
default_ruleset_id
ruleset_id
=
payload
.
ruleset_id
or
reference_tool
.
default_ruleset_id
if
payload
.
routed_summary_names
is
not
None
:
summary_names
=
{
name
.
strip
()
for
name
in
payload
.
routed_summary_names
if
isinstance
(
name
,
str
)
and
name
.
strip
()
}
all_rules
=
reference_tool
.
run
(
ruleset_id
=
ruleset_id
)
.
get
(
"rules"
,
[])
rules
=
[
rule
for
rule
in
all_rules
if
str
(
rule
.
get
(
"summary"
,
""
))
.
strip
()
in
summary_names
]
else
:
rules
=
reference_tool
.
run
(
rules
=
reference_tool
.
run
(
ruleset_id
=
ruleset_id
,
ruleset_id
=
ruleset_id
,
routed_rule_titles
=
payload
.
routed_rule_titles
,
routed_rule_titles
=
payload
.
routed_rule_titles
,
...
@@ -182,6 +196,9 @@ class SegmentReviewRequest(BaseModel):
...
@@ -182,6 +196,9 @@ class SegmentReviewRequest(BaseModel):
routed_rule_titles
:
Optional
[
List
[
str
]]
=
None
routed_rule_titles
:
Optional
[
List
[
str
]]
=
None
file_ext
:
str
file_ext
:
str
context_memories
:
Optional
[
List
[
Dict
]]
=
None
context_memories
:
Optional
[
List
[
Dict
]]
=
None
route_by
:
Literal
[
"rule"
,
"summary"
]
=
Field
(
default
=
"rule"
,
description
=
"路由依据:rule=审查规则项,summary=摘要项"
)
class
SegmentReviewResponse
(
BaseModel
):
class
SegmentReviewResponse
(
BaseModel
):
...
@@ -195,7 +212,9 @@ class SegmentRuleRouterResponse(BaseModel):
...
@@ -195,7 +212,9 @@ class SegmentRuleRouterResponse(BaseModel):
conversation_id
:
str
conversation_id
:
str
segment_id
:
int
segment_id
:
int
ruleset_id
:
str
ruleset_id
:
str
route_by
:
Literal
[
"rule"
,
"summary"
]
=
"rule"
routed_rule_titles
:
List
[
str
]
routed_rule_titles
:
List
[
str
]
routed_summary_names
:
List
[
str
]
=
Field
(
default_factory
=
list
)
routed_rules
:
List
[
Dict
]
routed_rules
:
List
[
Dict
]
...
@@ -311,13 +330,16 @@ def route_segment_rules(payload: SegmentReviewRequest) -> SegmentRuleRouterRespo
...
@@ -311,13 +330,16 @@ def route_segment_rules(payload: SegmentReviewRequest) -> SegmentRuleRouterRespo
rules
=
rules
,
rules
=
rules
,
party_role
=
payload
.
party_role
or
""
,
party_role
=
payload
.
party_role
or
""
,
context_memories
=
payload
.
context_memories
,
context_memories
=
payload
.
context_memories
,
route_by
=
payload
.
route_by
,
)
)
return
SegmentRuleRouterResponse
(
return
SegmentRuleRouterResponse
(
conversation_id
=
payload
.
conversation_id
,
conversation_id
=
payload
.
conversation_id
,
segment_id
=
payload
.
segment_id
,
segment_id
=
payload
.
segment_id
,
ruleset_id
=
ruleset_id
,
ruleset_id
=
ruleset_id
,
route_by
=
result
.
get
(
"route_by"
,
payload
.
route_by
),
routed_rule_titles
=
result
.
get
(
"routed_rule_titles"
,
[]),
routed_rule_titles
=
result
.
get
(
"routed_rule_titles"
,
[]),
routed_summary_names
=
result
.
get
(
"routed_summary_names"
,
[]),
routed_rules
=
result
.
get
(
"routed_rules"
,
[]),
routed_rules
=
result
.
get
(
"routed_rules"
,
[]),
)
)
...
...
utils/ocr_util.py
View file @
49b473ef
...
@@ -10,8 +10,7 @@ from utils.http_util import url_replace_fastgpt, download_file
...
@@ -10,8 +10,7 @@ from utils.http_util import url_replace_fastgpt, download_file
from
utils.common_util
import
random_str
from
utils.common_util
import
random_str
from
loguru
import
logger
from
loguru
import
logger
import
json
import
json
from
core.config
import
ocr_url
ocr_url
=
'http://192.168.252.71:8202/openapi/ocrUploadFile'
class
OCRUtil
:
class
OCRUtil
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment