Commit d26c53e1 by ccran

feat: add readme.md

parent 5f18aa67
......@@ -7,5 +7,7 @@
# Keep Python source files
!**/*.py
!README.md
# Keep this file tracked
!.gitignore
\ No newline at end of file
# 合同审查智能体 (Contract Review Agent)
一个基于 FastAPI 和大型语言模型 (LLM) 的智能合同审查系统,能够自动分析合同条款、识别风险并提供审查建议。
## 📋 项目概述
本项目是一个智能合同审查代理,通过以下流程实现合同自动化审查:
1. **文档解析** - 支持多种格式的合同文档解析
2. **分段处理** - 将合同按规则智能分段
3. **事实提取** - 从每个分段中提取与审查规则相关的客观事实
4. **规则审查** - 基于预设规则对提取的事实进行审查
5. **风险复核** - 对审查结果进行反思和复核
6. **结果合并** - 合并所有分段审查结果生成最终报告
## 🏗️ 项目结构
```
lufa-contract/
├── main.py # FastAPI 主应用入口
├── test.py # 测试脚本
├── core/ # 核心业务逻辑
│ ├── cache.py # 缓存管理
│ ├── config.py # 配置管理
│ ├── memory.py # 记忆/状态管理
│ ├── tool.py # 工具基类
│ └── tools/ # 具体工具实现
│ ├── segment_summary.py # 分段事实提取
│ ├── segment_review.py # 分段规则审查
│ ├── segment_rule_router.py # 规则路由
│ ├── retrieve_reference.py # 参考检索
│ ├── reflect_retry.py # 反思重试
│ └── segment_merger.py # 结果合并
├── data/ # 数据文件
│ ├── rules.xlsx # 审查规则表
│ ├── batch/ # 批量处理数据
│ └── benchmark/ # 基准测试数据
├── utils/ # 工具函数
│ ├── common_util.py # 通用工具
│ ├── http_util.py # HTTP 工具
│ └── doc_util.py # 文档工具
├── demo/ # 演示文件
├── tmp/ # 临时文件
└── .vscode/ # VSCode 配置
```
## 🔧 技术栈
- **后端框架**: FastAPI
- **LLM 服务**: Qwen2-72B-Instruct (可配置)
- **文档处理**: 支持 PDF、Word 等多种格式
- **日志**: Loguru
- **数据验证**: Pydantic
## 📦 核心功能
### 1. 分段事实提取 (SegmentSummary)
基于审查规则从合同分段中提取客观事实,确保:
- 事实可在原文中直接找到
- 不做抽象、概括或推断
- 不补充未出现的主体、条件或数值
### 2. 分段规则审查 (SegmentReview)
对提取的事实进行规则匹配和风险分析,输出:
- 风险等级 (H/M/L)
- 审查结论
- 修改建议
### 3. 反思重试 (ReflectRetry)
对审查结果进行自我反思,识别潜在问题并重试
### 4. 结果合并 (SegmentMerger)
合并所有分段的审查结果,生成完整的审查报告
## ⚙️ 配置说明
`core/config.py` 中可配置:
```python
# LLM 配置
LLMConfig:
base_url: "http://192.168.252.71:9002/v1"
model: "Qwen2-72B-Instruct"
# 审查规则集
ALL_RULESET_IDS = ["通用", "借款", "担保", "财务口", "金盘", "金盘简化"]
# 分段大小控制
MAX_SINGLE_CHUNK_SIZE = 5000
```
## 🚀 快速开始
### 1. 安装依赖
```bash
pip install fastapi uvicorn pydantic loguru
```
### 2. 启动服务
```bash
python main.py
```
服务将在 `http://localhost:8000` 启动
### 3. API 端点
- `POST /sleep` - 测试端点
- `POST /document/parse` - 解析合同文档
- `POST /contract/review` - 执行合同审查
- `GET /contract/{conversation_id}/result` - 获取审查结果
## 📝 使用示例
### 提交合同审查请求
```python
import requests
# 上传合同文档
response = requests.post(
"http://localhost:8000/document/parse",
json={
"conversation_id": "unique-conversation-id",
"file_url": "http://example.com/contract.pdf",
"ruleset_id": "通用"
}
)
# 获取审查结果
result = requests.get(
f"http://localhost:8000/contract/{response.json()['conversation_id']}/result"
)
```
## 🔐 安全说明
- API Key 配置在 `core/config.py`
- 支持内外网环境切换 (`use_lufa` 参数)
- 临时文件自动清理
## 📊 数据格式
### 审查结果结构
```json
{
"conversation_id": "xxx",
"findings": [
{
"segment_id": "seg_001",
"rule_id": "rule_001",
"risk_level": "H",
"fact": "提取的事实",
"conclusion": "审查结论",
"suggestion": "修改建议"
}
]
}
```
## 🛠️ 开发指南
### 添加新的审查规则
1.`data/rules.xlsx` 中添加新规则
2. 更新 `core/config.py` 中的规则集配置
3. 重启服务
### 自定义 LLM 模型
修改 `core/config.py` 中的 `LLMConfig`:
```python
LLMConfig:
base_url: "你的 LLM 服务地址"
model: "你的模型名称"
```
## 📄 许可证
内部使用,保留所有权利。
## 👥 维护者
- 开发团队
## 📞 联系方式
如有问题,请联系项目维护团队。
......@@ -17,7 +17,7 @@ from core.config import META_KEY
logger = logging.getLogger(__name__)
_ALLOWED_RISK_LEVELS = {"H", "M", "L",""}
_ALLOWED_RISK_LEVELS = {"H", "M", "L", ""}
FINDING_KEY_REVIEW = "review"
FINDING_KEY_REFLECT = "reflect"
FINDING_KEY_MERGE = "merge"
......@@ -44,7 +44,9 @@ class Finding:
def __post_init__(self) -> None:
level = (self.risk_level or "").upper()
if level not in _ALLOWED_RISK_LEVELS:
raise ValueError(f"risk_level must be one of {_ALLOWED_RISK_LEVELS}, got {self.risk_level}")
raise ValueError(
f"risk_level must be one of {_ALLOWED_RISK_LEVELS}, got {self.risk_level}"
)
self.risk_level = level
@classmethod
......@@ -72,10 +74,9 @@ class Finding:
class MemoryStore:
"""简化的记忆存储:合同事实 facts 与问题 findings。线程安全并支持 JSON 持久化。"""
storage_name: Optional[Path] = 'default.json'
storage_name: Optional[Path] = "default.json"
def __init__(self,storage_name:str = 'default.json') -> None:
def __init__(self, storage_name: str = "default.json") -> None:
self._storage_path = Path(__file__).resolve().parent.parent / "tmp" / storage_name # type: ignore[arg-type]
self._storage_path.parent.mkdir(parents=True, exist_ok=True)
self._lock = RLock()
......@@ -119,12 +120,14 @@ class MemoryStore:
for top_key, top_value in item.items():
if _key_match(top_key):
matched_values.append({
matched_values.append(
{
top_key: top_value,
META_KEY: item.get(META_KEY, {}) # include metadata if exists
})
META_KEY: item.get(
META_KEY, {}
), # include metadata if exists
}
)
return matched_values
......@@ -141,8 +144,16 @@ class MemoryStore:
def delete_findings_by_segment(self, key: str, segment_id: int) -> int:
return self._delete_findings_by_segment(key, segment_id)
def search_findings(self, key: str, keyword: str, rule_title: Optional[str] = None, risk_level: Optional[str] = None) -> List[Finding]:
return self._search_findings(self._get_findings_bucket(key), keyword, rule_title, risk_level)
def search_findings(
self,
key: str,
keyword: str,
rule_title: Optional[str] = None,
risk_level: Optional[str] = None,
) -> List[Finding]:
return self._search_findings(
self._get_findings_bucket(key), keyword, rule_title, risk_level
)
def list_findings_grouped(self) -> Dict[str, List[Finding]]:
with self._lock:
......@@ -166,7 +177,9 @@ class MemoryStore:
with self._lock:
return list(target)
def _get_findings_by_segment(self, target: List[Finding], segment_id: int) -> List[Finding]:
def _get_findings_by_segment(
self, target: List[Finding], segment_id: int
) -> List[Finding]:
with self._lock:
return [f for f in target if f.segment_id == segment_id]
......@@ -192,21 +205,29 @@ class MemoryStore:
with self._lock:
candidates = list(target)
if rule_title:
candidates = [f for f in candidates if (f.rule_title or "").lower() == rule_title.strip().lower()]
candidates = [
f
for f in candidates
if (f.rule_title or "").lower() == rule_title.strip().lower()
]
if risk_level:
lvl = risk_level.strip().upper()
candidates = [f for f in candidates if f.risk_level == lvl]
if not key:
return candidates
def _matches(f: Finding) -> bool:
hay = " ".join([
hay = " ".join(
[
f.rule_title,
f.original_text,
f.issue,
f.suggestion,
f.result,
]).lower()
]
).lower()
return key in hay
return [f for f in candidates if _matches(f)]
# ------------------- housekeeping ------------------
......@@ -225,7 +246,9 @@ class MemoryStore:
},
}
try:
self._storage_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
self._storage_path.write_text(
json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8"
)
except Exception as exc:
logger.error("Failed to persist memory store: %s", exc)
......@@ -243,7 +266,9 @@ class MemoryStore:
if isinstance(loaded_findings, dict):
for key, items in loaded_findings.items():
normalized_key = self._normalize_finding_key(str(key))
findings_map[normalized_key] = [Finding.from_dict(item) for item in (items or [])]
findings_map[normalized_key] = [
Finding.from_dict(item) for item in (items or [])
]
self.findings = findings_map
needs_persist = False
......@@ -262,7 +287,9 @@ class MemoryStore:
try:
from openpyxl import Workbook # type: ignore
except ImportError as exc:
raise ImportError("openpyxl is required for export_to_excel; install via 'pip install openpyxl'") from exc
raise ImportError(
"openpyxl is required for export_to_excel; install via 'pip install openpyxl'"
) from exc
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
name = file_name or f"memory_export_{ts}.xlsx"
......@@ -285,21 +312,34 @@ class MemoryStore:
if grouped_items:
first_key, first_values = grouped_items[0]
ws_first = wb.active
first_sheet_name = _FINDING_KEY_SHEET_NAMES.get(self._normalize_finding_key(first_key), first_key)
first_sheet_name = _FINDING_KEY_SHEET_NAMES.get(
self._normalize_finding_key(first_key), first_key
)
ws_first.title = self._safe_sheet_name(first_sheet_name)
ws_first.append([label for _, label in finding_headers])
for f in first_values:
ws_first.append([getattr(f, key, "") for key, _ in finding_headers])
for key, values in grouped_items[1:]:
sheet_name = _FINDING_KEY_SHEET_NAMES.get(self._normalize_finding_key(key), key)
sheet_name = _FINDING_KEY_SHEET_NAMES.get(
self._normalize_finding_key(key), key
)
ws = wb.create_sheet(self._safe_sheet_name(sheet_name))
ws.append([label for _, label in finding_headers])
for f in values:
ws.append([getattr(f, item_key, "") for item_key, _ in finding_headers])
ws.append(
[
getattr(f, item_key, "")
for item_key, _ in finding_headers
]
)
else:
ws_empty = wb.active
ws_empty.title = self._safe_sheet_name(_FINDING_KEY_SHEET_NAMES.get(_DEFAULT_REVIEW_KEY, _DEFAULT_REVIEW_KEY))
ws_empty.title = self._safe_sheet_name(
_FINDING_KEY_SHEET_NAMES.get(
_DEFAULT_REVIEW_KEY, _DEFAULT_REVIEW_KEY
)
)
ws_empty.append([label for _, label in finding_headers])
ws_facts = wb.create_sheet("合同事实")
......@@ -310,7 +350,12 @@ class MemoryStore:
ws_facts.append(["事实", json.dumps(item, ensure_ascii=False)])
continue
meta_info = item.get(META_KEY, None)
ws_facts.append([json.dumps(meta_info, ensure_ascii=False), json.dumps(item, ensure_ascii=False)])
ws_facts.append(
[
json.dumps(meta_info, ensure_ascii=False),
json.dumps(item, ensure_ascii=False),
]
)
else:
ws_facts.append(["元信息", "事实内容"])
......@@ -435,16 +480,17 @@ def test_export_findings_to_doc_comments(doc_path: str) -> None:
print("Export doc comments:")
print(json.dumps(res, ensure_ascii=False, indent=2))
def test_memory_and_export_excel():
# 简单示例:设置事实 -> 写入问题 -> 读取/搜索
store = MemoryStore()
store.add_facts({
store.add_facts(
{
"公司": {"甲方": "A 公司", "乙方": "B 公司"},
"支付": {"方式": "银行转账", "期限": "验收后30日内"},
META_KEY:{
"segment_id":1
META_KEY: {"segment_id": 1},
}
})
)
# print( store.search_facts(['支付']))
finding1 = Finding(
rule_title="违约责任",
......@@ -477,4 +523,3 @@ def test_memory_and_export_excel():
if __name__ == "__main__":
# test_export_findings_to_doc_comments("/home/ccran/lufa-contract/tmp/股份转让协议.docx")
test_memory_and_export_excel()
......@@ -9,7 +9,6 @@ from core.tool import ToolBase, tool, tool_func
from utils.excel_util import ExcelUtil
@tool("retrieve_reference", "审查参考检索")
class RetrieveReferenceTool(ToolBase):
def __init__(self) -> None:
......@@ -22,12 +21,16 @@ class RetrieveReferenceTool(ToolBase):
"triggers": "触发词",
"suggestion_template": "建议模板",
"case": "案例",
"summary":"摘要项"
"summary": "摘要项",
}
rules_path = Path(__file__).resolve().parent.parent.parent / "data" / "rules.xlsx"
rules_path = (
Path(__file__).resolve().parent.parent.parent / "data" / "rules.xlsx"
)
self.rulesets: Dict[str, List[Dict[str, Any]]] = {}
for rs_id in ALL_RULESET_IDS:
rules = ExcelUtil.load_mapped_excel(rules_path, sheet_name=rs_id, column_map=self.column_map)
rules = ExcelUtil.load_mapped_excel(
rules_path, sheet_name=rs_id, column_map=self.column_map
)
self.rulesets[rs_id] = rules
@tool_func(
......@@ -40,13 +43,21 @@ class RetrieveReferenceTool(ToolBase):
"required": [],
}
)
def run(self, ruleset_id: str = "", routed_rule_titles: List[str] | None = None) -> Dict[str, Any]:
def run(
self, ruleset_id: str = "", routed_rule_titles: List[str] | None = None
) -> Dict[str, Any]:
target_ruleset_id = ruleset_id or self.default_ruleset_id
full_rules = self.rulesets.get(target_ruleset_id) or self.rulesets.get(self.default_ruleset_id, []) or []
full_rules = (
self.rulesets.get(target_ruleset_id)
or self.rulesets.get(self.default_ruleset_id, [])
or []
)
if routed_rule_titles is None:
rules = full_rules
else:
title_set = {title for title in routed_rule_titles if isinstance(title, str)}
title_set = {
title for title in routed_rule_titles if isinstance(title, str)
}
rules = [r for r in full_rules if r.get("title") in title_set]
return {
......@@ -59,6 +70,7 @@ class RetrieveReferenceTool(ToolBase):
def summary_keywords(self, rules: List[Dict[str, Any]]) -> List[str]:
return [r.get("summary", "") for r in rules if r.get("summary")]
if __name__ == "__main__":
tool = RetrieveReferenceTool()
result = tool.run(ruleset_id="金盘", routed_rule_titles=None)
......
......@@ -3,7 +3,7 @@ import os
import re
import sys
sys.path.append('../..')
sys.path.append("../..")
import traceback
import concurrent.futures
......@@ -12,21 +12,21 @@ from loguru import logger
from utils.common_util import random_str
from utils.http_util import upload_file, fastgpt_openai_chat, download_file
# SUFFIX='_麓发迁移'
# batch_input_dir_path = 'jp-input'
# batch_output_dir_path = 'jp-output-lufa-new'
SUFFIX='_麓发'
batch_input_dir_path = 'lufa-input'
batch_output_dir_path = 'lufa-output'
SUFFIX = "_麓发迁移"
batch_input_dir_path = "jp-input"
batch_output_dir_path = "jp-output-lufa-new"
# SUFFIX = "_麓发"
# batch_input_dir_path = "lufa-input"
# batch_output_dir_path = "lufa-output"
batch_size = 5
# 麓发fastgpt接口
url = 'http://192.168.252.71:18089/api/v1/chat/completions'
# url = "http://192.168.252.71:18089/api/v1/chat/completions"
# 金盘fastgpt接口
# url = 'http://192.168.252.71:18088/api/v1/chat/completions'
url = "http://192.168.252.71:18088/api/v1/chat/completions"
# 麓发合同审查生产token
token = 'fastgpt-ek3Z6PxI6sXgYc0jxzZ5bVGqrxwM6aVyfSmA6JVErJYBMr2KmYxrHwEUOIMSYz'
# token = "fastgpt-ek3Z6PxI6sXgYc0jxzZ5bVGqrxwM6aVyfSmA6JVErJYBMr2KmYxrHwEUOIMSYz"
# 金盘迁移麓发合同审查测试token
# token = 'fastgpt-vykT6qs07g7hR4tL2MNJE6DdNCIxaQjEu3Cxw9nuTBFg8MAG3CkByvnXKxSNEyMK7'
token = "fastgpt-vykT6qs07g7hR4tL2MNJE6DdNCIxaQjEu3Cxw9nuTBFg8MAG3CkByvnXKxSNEyMK7"
# 人机交互测试(测试环境)
# token = 'fastgpt-p189K5zoTX5wjp0dBybFCwsbWm3juIwlJxt2wTGyiaOWOANI5Y10pKEZzyt'
# 人机交互测试(生产环境)
......@@ -34,9 +34,13 @@ token = 'fastgpt-ek3Z6PxI6sXgYc0jxzZ5bVGqrxwM6aVyfSmA6JVErJYBMr2KmYxrHwEUOIMSYz'
# 提取后审查测试
# token = 'fastgpt-n74gGX5ZqLT6o1ysMBSGUTjIciswYOWDRfQ75krMkE5gDVDkpzsbz8u'
def extract_url(text):
# \s * ([ ^ "\s]+?\.(?:docx?|pdf|xlsx))
excel_p, doc_p = r'最终审查Excel\s*([^"]*xlsx)', r'最终审查批注\s*([^\" ]+?\.(?:docx?|pdf|wps))'
excel_p, doc_p = (
r'最终审查Excel\s*([^"]*xlsx)',
r"最终审查批注\s*([^\" ]+?\.(?:docx?|pdf|wps))",
)
# 使用 re.search() 查找第一个匹配项
excel_m, doc_m = re.search(excel_p, text), re.search(doc_p, text)
if excel_m and doc_m:
......@@ -46,7 +50,9 @@ def extract_url(text):
return None, None
def process_single_file(file, batch_input_dir_path, batch_output_dir_path, counter, start_file):
def process_single_file(
file, batch_input_dir_path, batch_output_dir_path, counter, start_file
):
"""
单文件处理逻辑,可被线程池并发调用
"""
......@@ -55,29 +61,45 @@ def process_single_file(file, batch_input_dir_path, batch_output_dir_path, count
return
# 提取文件前缀
file_name = file[:file.rfind('.')]
ext_name = file[file.rfind('.'):]
file_name = file[: file.rfind(".")]
ext_name = file[file.rfind(".") :]
# 源目标处理
original_file = f'{batch_input_dir_path}/{file}'
des_check_file = f'{batch_output_dir_path}/{file_name}.md'
des_excel_file = f'{batch_output_dir_path}/{file_name}{SUFFIX}.xlsx'
des_doc_file = f'{batch_output_dir_path}/{file_name}{SUFFIX}{ext_name}'
original_file = f"{batch_input_dir_path}/{file}"
des_check_file = f"{batch_output_dir_path}/{file_name}.md"
des_excel_file = f"{batch_output_dir_path}/{file_name}{SUFFIX}.xlsx"
des_doc_file = f"{batch_output_dir_path}/{file_name}{SUFFIX}{ext_name}"
try:
# 处理原文件
file_url = upload_file(original_file, input_url_to_inner=True).replace('218.77.58.8', '192.168.252.71')
model = 'Qwen2-72B-Instruct'
file_url = upload_file(original_file, input_url_to_inner=True).replace(
"218.77.58.8", "192.168.252.71"
)
model = "Qwen2-72B-Instruct"
# 合同审核Excel工作流处理
logger.info(' 第{}个文件,处理文件: {}'.format(counter, original_file))
logger.info(" 第{}个文件,处理文件: {}".format(counter, original_file))
result = fastgpt_openai_chat(url, token, model, random_str(), file_url, f'测试批处理任务-{file_name}', False)
result = fastgpt_openai_chat(
url,
token,
model,
random_str(),
file_url,
f"测试批处理任务-{file_name}",
False,
)
excel_url, doc_url = extract_url(result)
if excel_url and doc_url:
download_file(excel_url.replace('218.77.58.8', '192.168.252.71'), des_excel_file)
download_file(doc_url.replace('218.77.58.8', '192.168.252.71'), des_doc_file)
logger.info(f'第{counter}个文件下载:{excel_url}到{des_excel_file} {des_doc_file}')
download_file(
excel_url.replace("218.77.58.8", "192.168.252.71"), des_excel_file
)
download_file(
doc_url.replace("218.77.58.8", "192.168.252.71"), des_doc_file
)
logger.info(
f"第{counter}个文件下载:{excel_url}到{des_excel_file} {des_doc_file}"
)
except Exception as e:
logger.error(f'{original_file} 处理异常 第{counter}个文件: {e}')
logger.error(f"{original_file} 处理异常 第{counter}个文件: {e}")
logger.error(traceback.print_exc())
......@@ -103,5 +125,5 @@ def execute_batch(max_workers: int = 4):
f.result()
if __name__ == '__main__':
if __name__ == "__main__":
execute_batch(batch_size)
......@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace:
parser.add_argument(
"--datasets-dir",
type=Path,
default=base / "results" / "jp-output-renji",
default=base / "results" / "jp-output-lufa",
help="Directory containing Word files with annotations.",
)
parser.add_argument(
......@@ -133,7 +133,7 @@ def _parse_args() -> argparse.Namespace:
parser.add_argument(
"--val-dir",
type=Path,
default=base / "results" / "jp-output-renji-extracted",
default=base / "results" / "jp-output-lufa-extracted",
help="Directory to store extracted xlsx files for comparison.",
)
parser.add_argument(
......
No preview for this file type
......@@ -9,7 +9,9 @@ class DocBase(ABC):
self._doc_path = None
self._doc_name = None
self._kwargs = kwargs
self._max_single_chunk_size = kwargs.get('max_single_chunk_size', MAX_SINGLE_CHUNK_SIZE)
self._max_single_chunk_size = kwargs.get(
"max_single_chunk_size", MAX_SINGLE_CHUNK_SIZE
)
@abstractmethod
def load(self, doc_path):
......
......@@ -509,10 +509,10 @@ class SpireWordDoc(DocBase):
cell_list.append(cell_content)
# table_data += "|" + "|".join(cell_list) + "|"
# table_data += "\n"
table_data += ' '.join(cell_list) + '\n'
table_data += " ".join(cell_list) + "\n"
if i == 0:
# table_data += "|" + "|".join(["--- " for _ in cell_list]) + "|\n"
table_data= ' '.join(cell_list) + '\n'
table_data = " ".join(cell_list) + "\n"
return table_data
def get_chunk_info(self, chunk_id):
......@@ -608,14 +608,18 @@ class SpireWordDoc(DocBase):
return True
def _update_comment_content(self, comment_idx, suggest):
self._doc.Comments.get_Item(comment_idx).Body.Paragraphs.get_Item(0).Text = suggest
self._doc.Comments.get_Item(comment_idx).Body.Paragraphs.get_Item(
0
).Text = suggest
def _try_add_comment_in_paragraphs(self, paragraphs, target_text, author, suggest):
if not target_text:
return False
for paragraph in paragraphs:
text_sel = paragraph.Find(target_text, False, True)
if text_sel and self.set_comment_by_text_selection(text_sel, author, suggest):
if text_sel and self.set_comment_by_text_selection(
text_sel, author, suggest
):
return True
return False
......@@ -767,8 +771,11 @@ class SpireWordDoc(DocBase):
# update chunk_id
comment_chunk_id = comment.get("chunk_id", -1)
# 优先使用comments里提供的chunk_id,如果没有或无效则使用外部传入的chunk_id,如果都没有则异常处理
sub_chunks = self.get_sub_chunks(comment_chunk_id) if comment_chunk_id != -1 \
and comment_chunk_id < self.get_chunk_num() else self.get_sub_chunks(chunk_id)
sub_chunks = (
self.get_sub_chunks(comment_chunk_id)
if comment_chunk_id != -1 and comment_chunk_id < self.get_chunk_num()
else self.get_sub_chunks(chunk_id)
)
author = self.format_comment_author(comment)
suggest = comment.get("suggest", "")
find_key = comment["original_text"].strip() or comment["key_points"]
......@@ -808,7 +815,9 @@ class SpireWordDoc(DocBase):
normalized_author = self._normalize_author_prefix(author)
for i in range(self._doc.Comments.Count):
current_comment = self._doc.Comments.get_Item(i)
comment_author = self._normalize_author_prefix(current_comment.Format.Author)
comment_author = self._normalize_author_prefix(
current_comment.Format.Author
)
if comment_author == normalized_author:
return i
return None
......@@ -876,9 +885,7 @@ class SpireWordDoc(DocBase):
if __name__ == "__main__":
doc = SpireWordDoc()
doc.load(
r"/home/ccran/lufa-contract/demo/今麦郎合同审核.docx"
)
doc.load(r"/home/ccran/lufa-contract/demo/今麦郎合同审核.docx")
print(doc._doc_name)
print("附件2《技术协议》" in doc.get_all_text())
# doc.add_chunk_comment(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment