Commit 2347d107 by ccran

feat: 技术性能77%

parent 1f595950
......@@ -16,8 +16,9 @@ class LLMConfig:
MERGE_RULE_PROMPT = False
MAX_SINGLE_CHUNK_SIZE = 5000
META_KEY = "META"
DEFAULT_RULESET_ID = "金盘简化"
ALL_RULESET_IDS = ["金盘简化"]
DEFAULT_RULESET_ID = "通用"
ALL_RULESET_IDS = ["通用", "借款", "担保", "财务口", "金盘", "金盘简化", "麓发测试"]
use_lufa = False
if use_lufa:
outer_backend_url = "http://znkf.lgfzgroup.com:48081"
......
......@@ -16,9 +16,10 @@ from utils.http_util import upload_file, fastgpt_openai_chat, download_file
SUFFIX = "_麓发迁移"
batch_input_dir_path = "jp-input"
batch_output_dir_path = f"/home/ccran/lufa-contract/data/benchmark/results/jp-output-lufa-{time.strftime('%Y%m%d-%H%M%S', time.localtime())}"
# SUFFIX = "_麓发"
# batch_input_dir_path = "lufa-input"
# batch_output_dir_path = "lufa-output"
# batch_output_dir_path = "lufa-output-standard"
batch_size = 5
# 麓发fastgpt接口
# url = "http://192.168.252.71:18089/api/v1/chat/completions"
......@@ -26,6 +27,8 @@ batch_size = 5
url = "http://192.168.252.71:18088/api/v1/chat/completions"
# 麓发合同审查生产token
# token = "fastgpt-ek3Z6PxI6sXgYc0jxzZ5bVGqrxwM6aVyfSmA6JVErJYBMr2KmYxrHwEUOIMSYz"
# 麓发合同审查生产token-标准化
# token = "fastgpt-mg5tQUgreJeF7peoOr5zqP0NR4EIrfS2bEVXge6FUL94Suu1TvEMR1sGNRSiV"
# 金盘迁移麓发合同审查测试token
token = "fastgpt-vykT6qs07g7hR4tL2MNJE6DdNCIxaQjEu3Cxw9nuTBFg8MAG3CkByvnXKxSNEyMK7"
# 人机交互测试(测试环境)
......
......@@ -121,7 +121,7 @@ def _parse_args() -> argparse.Namespace:
parser.add_argument(
"--datasets-dir",
type=Path,
default=base / "results" / "jp-output-lufa",
default=base / "results" / "jp-output-lufa-20260408-182708",
help="Directory containing Word files with annotations.",
)
parser.add_argument(
......
No preview for this file type
from spire.doc import *
import difflib
# 将文本写入文件
def WriteAllText(fname: str, text: str):
with open(fname, "w", encoding="utf-8") as fp:
fp.write(text)
inputFile = "/home/ccran/lufa-contract/demo/修订测试.docx"
# 创建Document类的对象
doc = Document()
# 加载Word文档
doc.LoadFromFile(inputFile)
before_text = doc.GetText()
print("===== 修订前文本 =====")
print(before_text)
# 检查文档是否有未接受的修订
if doc.HasChanges:
# 接受所有修订
doc.AcceptChanges()
after_text = doc.GetText()
print("===== 修订后文本 =====")
print(after_text)
before_lines = before_text.splitlines()
after_lines = after_text.splitlines()
diff_lines = list(
difflib.unified_diff(
before_lines,
after_lines,
fromfile="before_revision",
tofile="after_revision",
lineterm="",
)
)
print("===== Diff 对比结果 =====")
if diff_lines:
print("\n".join(diff_lines))
else:
print("修订前后文本一致,无差异。")
doc.Close()
from spire.doc import *
# 将文本写入文件
def WriteAllText(fname: str, text: str):
with open(fname, "w", encoding="utf-8") as fp:
fp.write(text)
inputFile = "/home/ccran/lufa-contract/demo/修订测试.docx"
outputFile1 = "/home/ccran/lufa-contract/demo/新增修订.txt"
outputFile2 = "/home/ccran/lufa-contract/demo/删除修订.txt"
# 创建Document类的对象
document = Document()
# 加载Word文档
document.LoadFromFile(inputFile)
print(document.GetText())
# 初始化列表来收集文本片段
insert_revisions = []
delete_revisions = []
# 遍历文档所有章节
for k in range(document.Sections.Count):
sec = document.Sections.get_Item(k)
# 遍历章节中的正文元素
for m in range(sec.Body.ChildObjects.Count):
# Check if the item is a Paragraph
docItem = sec.Body.ChildObjects.get_Item(m)
if isinstance(docItem, Paragraph):
para = docItem
# 判断段落是否为插入修订
if para.IsInsertRevision:
# 获取修订的类型,作者及其关联的内容
insRevison = para.InsertRevision
insType = insRevison.Type
insAuthor = insRevison.Author
insert_revisions.append(f"Revision Type: {insType.name}\n")
insert_revisions.append(f"Revision Author: {insAuthor}\n")
insert_revisions.append(f"Insertion Text: {para.Text}\n\n")
# 判断段落是否为删除修订
elif para.IsDeleteRevision:
# 获取修订的类型,作者及其关联的内容
delRevison = para.DeleteRevision
delType = delRevison.Type
delAuthor = delRevison.Author
delete_revisions.append(f"Revision Type:: {delType.name}\n")
delete_revisions.append(f"Revision Author: {delAuthor}\n")
delete_revisions.append(f"Deletion Text: {para.Text}\n\n")
# 如果段落没有修订,则遍历段落中的元素
else:
for j in range(para.ChildObjects.Count):
obj = para.ChildObjects.get_Item(j)
if isinstance(obj, TextRange):
textRange = obj
# 判断textrange是否为插入修订
if textRange.IsInsertRevision:
# 获取修订的类型,作者及其关联的内容
insRevison = textRange.InsertRevision
insType = insRevison.Type
insAuthor = insRevison.Author
insert_revisions.append(f"Revision Type: {insType.name}\n")
insert_revisions.append(f"Revision Author: {insAuthor}\n")
insert_revisions.append(
f"Insertion Text: {textRange.Text}\n"
)
# 判断textrange是否为删除修订
elif textRange.IsDeleteRevision:
# 获取修订的类型,作者及其关联的内容
delRevison = textRange.DeleteRevision
delType = delRevison.Type
delAuthor = delRevison.Author
delete_revisions.append(f"Revision Type: {delType.name}\n")
delete_revisions.append(f"Revision Author: {delAuthor}\n")
delete_revisions.append(
f"Deletion Text: {textRange.Text}\n"
)
# 写出修订信息到文件
WriteAllText(outputFile1, "".join(insert_revisions))
WriteAllText(outputFile2, "".join(delete_revisions))
# 释放资源
document.Dispose()
......@@ -394,7 +394,9 @@ def merge_segment_findings(payload: MergerRequest) -> MergerResponse:
unqualified_findings = [
f for f in segment_findings if (f.result or "").strip() == "不合格"
]
merged_result = merger_tool.run([f.__dict__ for f in unqualified_findings])
merged_result = merger_tool.run(
[f.__dict__ for f in unqualified_findings], merge_mode="rule"
)
merged_findings = merged_result.get("findings", []) or []
for f in merged_findings:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment