Commit 37f636e2 by ccran

feat: add skills;

parent 461d5ea7
......@@ -14,7 +14,7 @@ use_docker = False
@dataclass
class LLMConfig:
base_url: str = "http://192.168.252.71:9002/v1"
base_url: str = "http://172.21.107.80:9002/v1"
api_key: str = "none"
model: str = "Qwen2-72B-Instruct"
......@@ -23,8 +23,8 @@ min_single_chunk_size = 2000
max_single_chunk_size = 100000
max_chunk_page = 10
MAX_SINGLE_CHUNK_SIZE = 100000
# MAX_SINGLE_CHUNK_SIZE = 5000
# MAX_SINGLE_CHUNK_SIZE = 100000
MAX_SINGLE_CHUNK_SIZE = 5000
# MAX_SINGLE_CHUNK_SIZE = 2000
MERGE_RULE_PROMPT = False
META_KEY = "META"
......
......@@ -19,7 +19,7 @@ batch_size = 5
if not use_lufa:
SUFFIX = "_麓发迁移"
batch_input_dir_path = "jp-input"
batch_output_dir_path = f"/data/home/htsc/jp-contract/data/benchmark/results/jp-output-lufa-chunk100000"
batch_output_dir_path = f"/data/home/htsc/jp-contract/data/benchmark/results/jp-output-simple"
# 金盘fastgpt接口
url = "http://172.21.107.45:3002/api/v1/chat/completions"
# 金盘迁移麓发合同审查测试token
......
No preview for this file type
#!/usr/bin/env python3
"""Standalone file and FastGPT chat utility CLI."""
from __future__ import annotations
import argparse, json, mimetypes, random, re, string, sys, time, urllib.error, urllib.parse, urllib.request
from pathlib import Path
from typing import Any
FASTGPT = "http://172.21.107.45:3030"
BACKEND = "http://172.21.107.45:1122"
OUTER = "https://172.21.107.45:48080"
def rand(n: int = 8) -> str:
return "".join(random.choice(string.ascii_lowercase) for _ in range(n))
def url_replace_fastgpt(origin: str, base_fastgpt_url: str = FASTGPT) -> str:
return origin if origin.startswith(("http:", "https:")) else base_fastgpt_url + origin
def basename(name: str) -> str:
return Path(urllib.parse.unquote(name.strip().strip('"')).replace("\\", "/")).name or "downloaded_file"
def resolve_name(url: str, headers: dict[str, str]) -> str:
cd = headers.get("content-disposition", "") or headers.get("Content-Disposition", "")
for pat in [r"filename\*=(?:UTF-8''|utf-8'')?([^;]+)", r'filename="?([^";]+)"?']:
m = re.search(pat, cd)
if m:
return basename(m.group(1))
return basename(urllib.parse.urlparse(url).path)
def download_file(url: str, path: str, input_url_to_inner: bool = True, base_fastgpt_url: str = FASTGPT, base_backend_url: str = BACKEND, outer_backend_url: str = OUTER) -> str | None:
if input_url_to_inner and not url.startswith(("http:", "https:")):
url = base_fastgpt_url + url
if input_url_to_inner:
url = url.replace(outer_backend_url, base_backend_url)
try:
with urllib.request.urlopen(urllib.request.Request(url, method="GET"), timeout=120) as resp:
target = Path(path)
if target.exists() and target.is_dir():
target = target / resolve_name(url, dict(resp.headers))
target.parent.mkdir(parents=True, exist_ok=True); target.write_bytes(resp.read()); return str(target)
except urllib.error.HTTPError as exc:
print(f"{url}文件下载失败. HTTP Status Code: {exc.code}", file=sys.stderr); return None
def post_json(url: str, data: dict[str, Any], headers: dict[str, str] | None = None, timeout: int = 120) -> str:
req = urllib.request.Request(url, data=json.dumps(data, ensure_ascii=False).encode(), headers={"Content-Type": "application/json", **(headers or {})}, method="POST")
with urllib.request.urlopen(req, timeout=timeout) as resp:
return resp.read().decode("utf-8", errors="replace")
def multipart(path: str) -> tuple[bytes, str]:
p = Path(path); boundary = f"----common-tool-{int(time.time() * 1000)}-{rand()}"
ctype = mimetypes.guess_type(p.name)[0] or "application/octet-stream"
body = bytearray(f'--{boundary}\r\nContent-Disposition: form-data; name="file"; filename="{p.name}"\r\nContent-Type: {ctype}\r\n\r\n'.encode())
body.extend(p.read_bytes()); body.extend(f"\r\n--{boundary}--\r\n".encode()); return bytes(body), boundary
def upload_file(path: str, base_backend_url: str = BACKEND, username: str = "admin", password: str = "admin@jpai.com") -> str:
token = (json.loads(post_json(f"{base_backend_url}/admin-api/system/auth/login", {"username": username, "password": password})).get("data") or {}).get("accessToken")
if not token:
raise RuntimeError("后端登录异常")
body, boundary = multipart(path)
req = urllib.request.Request(f"{base_backend_url}/admin-api/infra/file/upload", data=body, headers={"Content-Type": f"multipart/form-data; boundary={boundary}", "Authorization": token}, method="POST")
with urllib.request.urlopen(req, timeout=120) as resp:
text = resp.read().decode("utf-8", errors="replace")
res = json.loads(text).get("data")
if not res:
raise RuntimeError(f"上传{path}失败 Response text: {text}")
return res
def fastgpt_openai_chat(url: str, token: str, model: str, chat_id: str, file_url: str, text: str, stream: bool = True) -> str:
data = {"chatId": chat_id, "messages": [{"role": "user", "content": [{"type": "file_url", "name": "文件", "url": file_url}, {"type": "text", "text": text}]}], "model": model, "stream": stream}
req = urllib.request.Request(url, data=json.dumps(data, ensure_ascii=False).encode(), headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"}, method="POST")
with urllib.request.urlopen(req, timeout=60000) as resp:
if not stream:
rsp = json.loads(resp.read().decode("utf-8", errors="replace")); return rsp.get("choices", [{}])[0].get("message", {}).get("content", "")
out = ""
for raw in resp:
line = raw.decode("utf-8", errors="replace").strip()
if not line or line == "data: [DONE]": continue
try:
out += json.loads(line[6:] if line.startswith("data: ") else line).get("choices", [{}])[0].get("delta", {}).get("content", "")
except Exception:
pass
return out
def main() -> int:
p = argparse.ArgumentParser(description="File/FastGPT utilities"); sub = p.add_subparsers(dest="cmd", required=True)
a = sub.add_parser("url-replace-fastgpt"); a.add_argument("origin"); a.add_argument("--base-fastgpt-url", default=FASTGPT)
a = sub.add_parser("download"); a.add_argument("url"); a.add_argument("path"); a.add_argument("--base-fastgpt-url", default=FASTGPT); a.add_argument("--base-backend-url", default=BACKEND); a.add_argument("--outer-backend-url", default=OUTER); a.add_argument("--no-input-url-to-inner", action="store_true")
a = sub.add_parser("upload"); a.add_argument("path"); a.add_argument("--base-backend-url", default=BACKEND); a.add_argument("--username", default="admin"); a.add_argument("--password", default="admin@jpai.com")
a = sub.add_parser("fastgpt-chat"); a.add_argument("--url", required=True); a.add_argument("--token", required=True); a.add_argument("--model", required=True); a.add_argument("--chat-id", required=True); a.add_argument("--file-url", required=True); a.add_argument("--text", required=True); a.add_argument("--no-stream", action="store_true")
x = p.parse_args()
if x.cmd == "url-replace-fastgpt": print(url_replace_fastgpt(x.origin, x.base_fastgpt_url))
elif x.cmd == "download": print(download_file(x.url, x.path, not x.no_input_url_to_inner, x.base_fastgpt_url, x.base_backend_url, x.outer_backend_url))
elif x.cmd == "upload": print(upload_file(x.path, x.base_backend_url, x.username, x.password))
elif x.cmd == "fastgpt-chat": print(fastgpt_openai_chat(x.url, x.token, x.model, x.chat_id, x.file_url, x.text, not x.no_stream))
return 0
if __name__ == "__main__":
raise SystemExit(main())
#!/usr/bin/env python3
"""Standalone JSON/text utility CLI."""
from __future__ import annotations
import argparse, json, random, re, string, sys
from datetime import datetime
from pathlib import Path
from typing import Any
MIN_SIZE, MAX_SIZE, MAX_PAGE = 2000, 100000, 10
def random_str(n: int = 5) -> str:
return "".join(random.choice(string.ascii_lowercase) for _ in range(n)) if n > 26 else "".join(random.sample(string.ascii_lowercase, n))
def format_now() -> str:
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def extract_url_file(url: str, formats: list[str]) -> str:
pat = "|".join(r"[\u4e00-\u9fa5()()0-9\w-]+" + re.escape(f) for f in formats)
m = re.search(pat, url)
if not m:
raise RuntimeError(f"{formats} not found in url:{url}")
return m.group()
def adjust_single_chunk_size(length: int, max_page: int = MAX_PAGE, min_size: int = MIN_SIZE, max_size: int = MAX_SIZE) -> int:
return max(min_size, min(length // max_page, max_size))
def _loads(text: str) -> Any:
try:
import json_repair # type: ignore
return json_repair.loads(text, strict=False)
except ImportError:
return json.loads(text)
def extract_json(text: str) -> list[Any]:
def add(candidate: str, out: list[Any]) -> bool:
s = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", (candidate or "").strip())
if not s:
return False
try:
obj = _loads(s)
except Exception:
return False
out.extend(obj if isinstance(obj, list) else [obj]); return True
out: list[Any] = []
for m in re.findall(r"```json([\s\S]*?)```", text or "", re.DOTALL):
add(m, out)
if out or add(text or "", out):
return out
for m in re.findall(r"```([\s\S]*?)```", text or "", re.DOTALL):
if add(m, out):
return out
for m in re.findall(r"(\{[\s\S]*?\}|\[[\s\S]*?\])", text or "", re.DOTALL):
add(m, out)
return out
def remove_duplicates_by_key(items: list[dict[str, Any]], key: str) -> list[dict[str, Any]]:
out, seen = [], []
for item in sorted(items, key=lambda x: len(str(x.get(key, ""))), reverse=True):
v = str(item.get(key, ""))
if not any(v in s for s in seen):
seen.append(v); out.append(item)
return out
def group_chunk_by_len(items: list[dict[str, Any]], key: str, chunk_len: int) -> list[list[dict[str, Any]]]:
groups, current, acc = [], [], 0
for item in items:
n = len(str(item.get(key, "")))
if current and acc + n > chunk_len:
groups.append(current); current, acc = [], 0
current.append(item); acc += n
return groups + ([current] if current else [])
def read_json_arg(value: str) -> Any:
p = Path(value)
return json.loads(p.read_text(encoding="utf-8")) if p.exists() else json.loads(value)
def main() -> int:
p = argparse.ArgumentParser(description="JSON/text utilities"); sub = p.add_subparsers(dest="cmd", required=True)
a = sub.add_parser("random-str"); a.add_argument("-l", "--length", type=int, default=5)
sub.add_parser("format-now")
a = sub.add_parser("extract-url-file"); a.add_argument("url"); a.add_argument("formats", nargs="+")
a = sub.add_parser("adjust-single-chunk-size"); a.add_argument("all_text_len", type=int); a.add_argument("--max-chunk-page", type=int, default=MAX_PAGE); a.add_argument("--min-single-chunk-size", type=int, default=MIN_SIZE); a.add_argument("--max-single-chunk-size", type=int, default=MAX_SIZE)
a = sub.add_parser("extract-json"); a.add_argument("text", nargs="?")
a = sub.add_parser("remove-duplicates-by-key"); a.add_argument("json_list"); a.add_argument("key")
a = sub.add_parser("extract-drop-json-part"); a.add_argument("text", nargs="?")
a = sub.add_parser("group-chunk-by-len"); a.add_argument("json_list"); a.add_argument("key"); a.add_argument("chunk_len", type=int)
x = p.parse_args()
if x.cmd == "random-str": print(random_str(x.length))
elif x.cmd == "format-now": print(format_now())
elif x.cmd == "extract-url-file": print(extract_url_file(x.url, x.formats))
elif x.cmd == "adjust-single-chunk-size": print(adjust_single_chunk_size(x.all_text_len, x.max_chunk_page, x.min_single_chunk_size, x.max_single_chunk_size))
elif x.cmd == "extract-json": print(json.dumps(extract_json(x.text if x.text is not None else sys.stdin.read()), ensure_ascii=False, indent=2))
elif x.cmd == "remove-duplicates-by-key": print(json.dumps(remove_duplicates_by_key(read_json_arg(x.json_list), x.key), ensure_ascii=False, indent=2))
elif x.cmd == "extract-drop-json-part": print(re.sub(r"```json([\s\S]*?)```", "", x.text if x.text is not None else sys.stdin.read(), flags=re.DOTALL).strip())
elif x.cmd == "group-chunk-by-len": print(json.dumps(group_chunk_by_len(read_json_arg(x.json_list), x.key, x.chunk_len), ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())
#!/usr/bin/env python3
"""Standalone contract review orchestration CLI."""
from __future__ import annotations
import argparse, json, subprocess, sys
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[3]
DOC = ROOT / "skills/doc-excel-skill/scripts/doc_tool.py"
XLS = ROOT / "skills/doc-excel-skill/scripts/excel_tool.py"
LLM = ROOT / "skills/review-llm-skill/scripts/review_llm_skill.py"
COLS = {"id": "ID", "title": "审查项", "rule": "审查规则", "level": "风险等级", "triggers": "触发词", "suggestion_template": "建议模板", "case": "案例", "summary": "摘要项"}
def sh(args: list[str], text: bool = False) -> Any:
out = subprocess.check_output([sys.executable, *args], text=True)
return out if text else json.loads(out or "null")
def dump(path: Path, data: Any) -> str:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
return str(path)
def norm_rules(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
return [{k: r.get(v, "") for k, v in COLS.items()} for r in rows if isinstance(r, dict)]
def titles(items: list[dict[str, Any]]) -> list[str]:
return [str(i.get("title") or i.get("rule_title") or "").strip() for i in items if str(i.get("title") or i.get("rule_title") or "").strip()]
def pick_rules(all_rules: list[dict[str, Any]], selected: list[str]) -> list[dict[str, Any]]:
if not selected:
return all_rules
selected_set = set(selected)
return [r for r in all_rules if r.get("title") in selected_set]
def llm(tool: str, **kw: Any) -> dict[str, Any]:
args = [str(LLM), tool]
for key, value in kw.items():
flag = "--" + key.replace("_", "-")
if isinstance(value, (dict, list)):
args += [flag, json.dumps(value, ensure_ascii=False)]
elif value is not None:
args += [flag, str(value)]
return sh(args)
def route_segment(text: str, rules: list[dict[str, Any]], party_role: str, mode: str) -> tuple[list[str], list[str], list[dict[str, Any]]]:
if mode == "none":
return titles(rules), sorted({r.get("summary", "") for r in rules if r.get("summary")}), rules
res = llm("router", segment_text=text, rules=rules, party_role=party_role)
items = res.get("selected_items") or res.get("routed_rules") or []
sel_titles = titles(items)
routed = pick_rules(rules, sel_titles)
summaries = sorted({r.get("summary", "") for r in routed if r.get("summary")})
return titles(routed), summaries, routed
def reflect_findings(rules: list[dict[str, Any]], facts: list[dict[str, Any]], findings: list[dict[str, Any]], party_role: str) -> list[dict[str, Any]]:
out: list[dict[str, Any]] = []
for rule in rules:
name = rule.get("title", "")
scoped = [f for f in findings if f.get("rule_title") == name]
if not scoped:
continue
summary = rule.get("summary", "")
fact_scope = [{summary: f.get(summary)} for f in facts if summary and isinstance(f, dict) and summary in f]
res = llm("reflect", rule=rule, findings=scoped, facts=fact_scope, party_role=party_role)
out.extend(res.get("final_findings") or res.get("findings") or [])
return out
def merge_by_segment(findings: list[dict[str, Any]]) -> list[dict[str, Any]]:
out: list[dict[str, Any]] = []
segs = sorted({int(f.get("segment_id", 0) or 0) for f in findings})
for seg in segs:
items = [f for f in findings if int(f.get("segment_id", 0) or 0) == seg and str(f.get("result", "")).strip() == "不合格"]
if not items:
continue
res = llm("merger", payload=items)
merged = res.get("findings") or []
for item in merged:
item.setdefault("segment_id", seg)
out.extend(merged)
return out
def run(file: Path, rules_path: Path, ruleset: str, out_dir: Path, party_role: str, route_by: str, reflect: bool, merge_mode: str, max_chunks: int, dry_run: bool) -> dict[str, Any]:
out_dir.mkdir(parents=True, exist_ok=True)
info = sh([str(DOC), "doc-load", str(file)])
rows = sh([str(XLS), "load-excel", str(rules_path), "--sheet-name", ruleset])
rules, chunk_ids = norm_rules(rows), info.get("chunk_ids", [])
if max_chunks:
chunk_ids = chunk_ids[:max_chunks]
memory = {"file": str(file), "ruleset": ruleset, "segment_ids": [i + 1 for i in chunk_ids], "rule_titles": titles(rules), "summary_names": sorted({r.get("summary", "") for r in rules if r.get("summary")}), "facts": [], "merge_facts": [], "findings": {"review": [], "reflect": [], "merge": []}}
if dry_run:
dump(out_dir / "memory.json", memory); return memory
for cid in chunk_ids:
text = sh([str(DOC), "doc-chunk", str(file), str(cid)], text=True)
routed_titles, routed_summaries, routed_rules = route_segment(text, rules, party_role, route_by)
summary = llm("summary", segment_text=text, rules=routed_rules, party_role=party_role)
fact = summary.get("facts", summary)
if isinstance(fact, dict):
memory["facts"].append(fact)
review = llm("review", segment_text=text, rules=routed_rules, party_role=party_role)
for f in review.get("findings", []):
f.setdefault("segment_id", cid); memory["findings"]["review"].append(f)
memory.setdefault("routes", []).append({"segment_id": cid + 1, "routed_rule_titles": routed_titles, "routed_summary_names": routed_summaries})
if reflect:
memory["findings"]["reflect"] = reflect_findings(rules, memory["facts"], memory["findings"]["review"], party_role)
source = memory["findings"]["reflect"] or memory["findings"]["review"]
memory["findings"]["merge"] = merge_by_segment(source)
fact_res = llm("fact-merge", facts=memory["facts"], summary_names=memory["summary_names"], merge_mode=merge_mode)
memory["merge_facts"] = [fact_res.get("merge_facts", {})]
mem_path = Path(dump(out_dir / "memory.json", memory))
sh([str(XLS), "export-findings-excel", "@" + str(mem_path), str(out_dir / "review.xlsx"), "--finding-key", "merge" if memory["findings"]["merge"] else ("reflect" if memory["findings"]["reflect"] else "review")], text=True)
if file.suffix.lower() == ".docx":
sh([str(DOC), "docx-add-comments", str(file), "@" + str(mem_path), str(out_dir / "commented.docx"), "--finding-key", "merge" if memory["findings"]["merge"] else ("reflect" if memory["findings"]["reflect"] else "review")], text=True)
return memory
def main() -> int:
p = argparse.ArgumentParser(description="Contract review flow orchestrator")
p.add_argument("file"); p.add_argument("--rules", default=str(ROOT / "data/rules.xlsx")); p.add_argument("--ruleset", default="通用"); p.add_argument("--out-dir", default="outputs/review-flow")
p.add_argument("--party-role", default=""); p.add_argument("--route-by", choices=["rule", "none"], default="rule"); p.add_argument("--no-reflect", action="store_true"); p.add_argument("--merge-mode", choices=["llm", "rule"], default="rule")
p.add_argument("--max-chunks", type=int, default=0); p.add_argument("--dry-run", action="store_true")
a = p.parse_args()
run(Path(a.file), Path(a.rules), a.ruleset, Path(a.out_dir), a.party_role, a.route_by, not a.no_reflect, a.merge_mode, a.max_chunks, a.dry_run)
print("输出目录:", Path(a.out_dir).resolve())
return 0
if __name__ == "__main__":
raise SystemExit(main())
#!/usr/bin/env python3
"""Standalone Excel CLI for rule tables and review exports."""
from __future__ import annotations
import argparse
import csv
import json
import string
import zipfile
from pathlib import Path
from typing import Any
from xml.etree import ElementTree as ET
NS = {"a": "http://schemas.openxmlformats.org/spreadsheetml/2006/main", "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", "rel": "http://schemas.openxmlformats.org/package/2006/relationships"}
class ExcelLoadError(Exception):
pass
def _json(v: Any) -> None:
print(json.dumps(v, ensure_ascii=False, indent=2))
def _load_json(v: str) -> Any:
return json.loads(Path(v[1:]).read_text(encoding="utf-8") if v.startswith("@") else v)
def _col_idx(ref: str) -> int:
n = 0
for ch in "".join(c for c in ref if c in string.ascii_letters).upper():
n = n * 26 + ord(ch) - 64
return max(n - 1, 0)
def _rows_to_result(rows: list, header: bool) -> list:
if not rows:
return []
if not header:
return [list(r) for r in rows]
heads = [str(h).strip() if h is not None else "" for h in rows[0]]
return [{heads[i] if i < len(heads) else f"col{i}": row[i] for i in range(len(row))} for row in rows[1:]]
def _sheet_map(zf: zipfile.ZipFile) -> list[tuple[str, str]]:
wb = ET.fromstring(zf.read("xl/workbook.xml"))
rels = ET.fromstring(zf.read("xl/_rels/workbook.xml.rels"))
rel_map = {r.attrib["Id"]: r.attrib["Target"] for r in rels.findall("rel:Relationship", NS)}
out = []
for s in wb.findall(".//a:sheets/a:sheet", NS):
target = rel_map.get(s.attrib.get(f"{{{NS['r']}}}id", ""), "")
out.append((s.attrib.get("name", ""), "xl/" + target.lstrip("/") if not target.startswith("xl/") else target))
return out
def _shared(zf: zipfile.ZipFile) -> list[str]:
try:
root = ET.fromstring(zf.read("xl/sharedStrings.xml"))
except KeyError:
return []
return ["".join(t.text or "" for t in item.findall(".//a:t", NS)) for item in root.findall(".//a:si", NS)]
def _load_std_xlsx(path: Path, sheet: str | None, header: bool) -> list:
with zipfile.ZipFile(path) as zf:
shared, sheets = _shared(zf), _sheet_map(zf)
if not sheets:
return []
sheet_path = next((p for n, p in sheets if n == sheet), sheets[0][1])
root = ET.fromstring(zf.read(sheet_path))
rows = []
for r in root.findall(".//a:sheetData/a:row", NS):
values = []
for c in r.findall("a:c", NS):
while len(values) < _col_idx(c.attrib.get("r", "")):
values.append(None)
raw = (c.find("a:v", NS).text if c.find("a:v", NS) is not None else None)
values.append(shared[int(raw)] if c.attrib.get("t") == "s" and raw is not None and int(raw) < len(shared) else raw)
rows.append(values)
return _rows_to_result(rows, header)
def load_excel(path: str, sheet: str | None = None, header: bool = True) -> list:
p = Path(path)
if p.suffix.lower() in {".csv", ".tsv"}:
with p.open(newline="", encoding="utf-8-sig", errors="replace") as f:
return _rows_to_result(list(csv.reader(f, delimiter="\t" if p.suffix.lower() == ".tsv" else ",")), header)
try:
import openpyxl # type: ignore
wb = openpyxl.load_workbook(p, data_only=True, read_only=True)
ws = wb[sheet] if sheet else wb.active
return _rows_to_result(list(ws.iter_rows(values_only=True)), header)
except ImportError:
if p.suffix.lower() != ".xlsx":
raise ExcelLoadError("openpyxl is required for non-xlsx files")
return _load_std_xlsx(p, sheet, header)
def list_sheets(path: str) -> list[str]:
try:
import openpyxl # type: ignore
return openpyxl.load_workbook(path, read_only=True).sheetnames
except ImportError:
with zipfile.ZipFile(path) as zf:
return [n for n, _ in _sheet_map(zf)]
def _norm_findings(v: Any, key: str | None = None) -> list[dict]:
if isinstance(v, list):
return [x for x in v if isinstance(x, dict)]
if not isinstance(v, dict):
return []
if "findings" in v:
return _norm_findings(v["findings"].get(key) if key and isinstance(v["findings"], dict) else v["findings"])
if key and key in v:
return _norm_findings(v[key])
if any(k in v for k in ("rule_title", "issue", "suggestion")):
return [v]
return [x for items in v.values() for x in _norm_findings(items)]
def _norm_facts(v: Any, key: str) -> list[dict]:
if isinstance(v, list):
return [x for x in v if isinstance(x, dict)]
if isinstance(v, dict):
x = v.get(key, v)
return [i for i in x if isinstance(i, dict)] if isinstance(x, list) else ([x] if isinstance(x, dict) else [])
return []
def _cell(v: Any) -> str:
return json.dumps(v, ensure_ascii=False, indent=2) if isinstance(v, (dict, list)) else ("" if v is None else str(v))
def export_excel(findings: Any, out: str, facts: Any = None, merge: Any = None, key: str | None = None) -> str:
from openpyxl import Workbook # type: ignore
from openpyxl.styles import Alignment, Font # type: ignore
wb, headers = Workbook(), ["ID", "规则标题", "分段ID", "原文", "问题描述", "风险等级", "合格性", "建议"]
ws = wb.active; ws.title = "审查结果"; ws.append(headers)
for f in _norm_findings(findings, key):
ws.append([f.get("id", ""), f.get("rule_title", ""), f.get("segment_id", ""), f.get("original_text", ""), f.get("issue", ""), f.get("risk_level") or f.get("level", ""), f.get("result", ""), f.get("suggestion", "")])
for sheet, rows in {"合同事实": _norm_facts(facts or {}, "facts"), "合并事实": _norm_facts(merge or {}, "merge_facts")}.items():
w = wb.create_sheet(sheet); w.append(["提取项", "提取内容"])
for item in rows:
for k, v in item.items():
if str(k) not in {"_meta", "meta"}:
w.append([k, _cell(v)])
for w in wb.worksheets:
for c in w[1]:
c.font = Font(bold=True)
for row in w.iter_rows():
for c in row:
c.alignment = Alignment(vertical="top", wrap_text=True)
Path(out).parent.mkdir(parents=True, exist_ok=True); wb.save(out); return out
def main() -> int:
p = argparse.ArgumentParser(description="Standalone Excel CLI"); sub = p.add_subparsers(dest="cmd", required=True)
a = sub.add_parser("load-excel"); a.add_argument("file"); a.add_argument("--sheet-name"); a.add_argument("--no-header", action="store_true")
a = sub.add_parser("list-sheets"); a.add_argument("file")
a = sub.add_parser("find-value"); a.add_argument("file"); a.add_argument("key_column"); a.add_argument("key_value"); a.add_argument("value_column"); a.add_argument("--sheet-name")
a = sub.add_parser("map-rows"); a.add_argument("file"); a.add_argument("column_map"); a.add_argument("--sheet-name")
a = sub.add_parser("export-findings-excel"); a.add_argument("findings"); a.add_argument("output"); a.add_argument("--facts"); a.add_argument("--merge-facts"); a.add_argument("--finding-key")
a = sub.add_parser("export-facts-excel"); a.add_argument("facts"); a.add_argument("output"); a.add_argument("--merge-facts")
x = p.parse_args()
if x.cmd == "load-excel": _json(load_excel(x.file, x.sheet_name, not x.no_header))
elif x.cmd == "list-sheets": _json(list_sheets(x.file))
elif x.cmd == "find-value": _json(next((r.get(x.value_column) for r in load_excel(x.file, x.sheet_name) if isinstance(r, dict) and r.get(x.key_column) == x.key_value), None))
elif x.cmd == "map-rows": _json([{k: r.get(v) for k, v in json.loads(x.column_map).items()} for r in load_excel(x.file, x.sheet_name) if isinstance(r, dict)])
elif x.cmd == "export-findings-excel": print(export_excel(_load_json(x.findings), x.output, _load_json(x.facts) if x.facts else None, _load_json(x.merge_facts) if x.merge_facts else None, x.finding_key))
elif x.cmd == "export-facts-excel": print(export_excel([], x.output, _load_json(x.facts), _load_json(x.merge_facts) if x.merge_facts else None))
return 0
if __name__ == "__main__":
raise SystemExit(main())
#!/usr/bin/env python3
"""Standalone HTTP upload/download CLI."""
from __future__ import annotations
import argparse, json, mimetypes, random, re, string, sys, time, urllib.error, urllib.request
from pathlib import Path
from urllib.parse import unquote, urlparse
DEFAULT_OUTER_BACKEND_URL = "https://172.21.107.45:48080"
DEFAULT_BASE_FASTGPT_URL = "http://172.21.107.45:3030"
DEFAULT_BASE_BACKEND_URL = "http://172.21.107.45:1122"
DEFAULT_BACKEND_ADMIN_USERNAME = "admin"
DEFAULT_BACKEND_ADMIN_PASSWORD = "admin@jpai.com"
base_fastgpt_url, base_backend_url, outer_backend_url = DEFAULT_BASE_FASTGPT_URL, DEFAULT_BASE_BACKEND_URL, DEFAULT_OUTER_BACKEND_URL
backend_admin_username, backend_admin_password = DEFAULT_BACKEND_ADMIN_USERNAME, DEFAULT_BACKEND_ADMIN_PASSWORD
def configure_urls(fastgpt_url: str | None = None, backend_url: str | None = None, outer_url: str | None = None) -> None:
global base_fastgpt_url, base_backend_url, outer_backend_url
base_fastgpt_url = fastgpt_url or base_fastgpt_url
base_backend_url = backend_url or base_backend_url
outer_backend_url = outer_url or outer_backend_url
def configure_login(username: str | None = None, password: str | None = None) -> None:
global backend_admin_username, backend_admin_password
backend_admin_username = username or backend_admin_username
backend_admin_password = password or backend_admin_password
def _strip(url: str | None) -> str | None:
return url.rstrip("/") if url else url
def _random_str(n: int = 8) -> str:
return "".join(random.choice(string.ascii_lowercase) for _ in range(n))
def _post_json(url: str, data: dict, timeout: int = 120) -> str:
req = urllib.request.Request(url, data=json.dumps(data, ensure_ascii=False).encode(), headers={"Content-Type": "application/json"}, method="POST")
with urllib.request.urlopen(req, timeout=timeout) as resp:
return resp.read().decode("utf-8", errors="replace")
def _multipart_body(path: str, field: str = "file") -> tuple[bytes, str]:
p = Path(path); boundary = f"----http-skill-{int(time.time() * 1000)}-{_random_str()}"
ctype = mimetypes.guess_type(p.name)[0] or "application/octet-stream"
body = bytearray()
body.extend(f"--{boundary}\r\n".encode())
body.extend(f'Content-Disposition: form-data; name="{field}"; filename="{p.name}"\r\nContent-Type: {ctype}\r\n\r\n'.encode())
body.extend(p.read_bytes()); body.extend(f"\r\n--{boundary}--\r\n".encode())
return bytes(body), boundary
def upload_file(path, input_url_to_inner=True, output_url_to_inner=False) -> str:
login = _post_json(f"{base_backend_url}/admin-api/system/auth/login", {"username": backend_admin_username, "password": backend_admin_password})
token = (json.loads(login).get("data") or {}).get("accessToken")
if not token:
raise RuntimeError(f"后端登录异常:{login}")
body, boundary = _multipart_body(path)
req = urllib.request.Request(f"{base_backend_url}/admin-api/infra/file/upload", data=body, headers={"Content-Type": f"multipart/form-data; boundary={boundary}", "Authorization": token}, method="POST")
with urllib.request.urlopen(req, timeout=120) as resp:
text = resp.read().decode("utf-8", errors="replace")
res = json.loads(text).get("data")
if not res:
raise RuntimeError(f"上传{path}失败 Response text: {text}")
return res
def _basename(name: str) -> str:
return Path(unquote(name.strip().strip('"')).replace("\\", "/")).name or "downloaded_file"
def _resolve_name(url: str, headers) -> str:
cd = headers.get("content-disposition", "") or headers.get("Content-Disposition", "")
for pat in [r"filename\*=(?:UTF-8''|utf-8'')?([^;]+)", r'filename="?([^";]+)"?']:
m = re.search(pat, cd)
if m:
return _basename(m.group(1))
return _basename(urlparse(url).path)
def download_file(url, path, input_url_to_inner=True):
if input_url_to_inner and not url.startswith(("http:", "https:")):
url = base_fastgpt_url + url
if input_url_to_inner:
url = url.replace(outer_backend_url, base_backend_url)
try:
with urllib.request.urlopen(urllib.request.Request(url, method="GET"), timeout=120) as resp:
target = Path(path)
if target.exists() and target.is_dir():
target = target / _resolve_name(url, resp.headers)
target.parent.mkdir(parents=True, exist_ok=True); target.write_bytes(resp.read())
return str(target)
except urllib.error.HTTPError as exc:
print(f"{url}文件下载失败. HTTP Status Code: {exc.code}", file=sys.stderr)
return None
def url_replace_fastgpt(origin: str):
return origin if origin.startswith(("http:", "https:")) else base_fastgpt_url + origin
def add_url_args(p: argparse.ArgumentParser) -> None:
p.add_argument("--base-fastgpt-url", default=DEFAULT_BASE_FASTGPT_URL)
p.add_argument("--base-backend-url", default=DEFAULT_BASE_BACKEND_URL)
p.add_argument("--outer-backend-url", default=DEFAULT_OUTER_BACKEND_URL)
def build_arg_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(description="上传、下载或补全 FastGPT/后端文件 URL。")
sub = p.add_subparsers(dest="command", required=True)
u = sub.add_parser("upload"); add_url_args(u); u.add_argument("--username", default=DEFAULT_BACKEND_ADMIN_USERNAME); u.add_argument("--password", default=DEFAULT_BACKEND_ADMIN_PASSWORD); u.add_argument("path")
d = sub.add_parser("download"); add_url_args(d); d.add_argument("url"); d.add_argument("path")
n = sub.add_parser("normalize-url"); add_url_args(n); n.add_argument("url")
return p
def main(argv: list[str] | None = None) -> int:
p = build_arg_parser(); a = p.parse_args(argv)
configure_urls(_strip(a.base_fastgpt_url), _strip(a.base_backend_url), _strip(a.outer_backend_url))
if a.command == "upload":
configure_login(a.username, a.password); print(upload_file(a.path)); return 0
if a.command == "download":
saved = download_file(a.url, a.path)
if saved is None:
return 1
print(saved); return 0
if a.command == "normalize-url":
print(url_replace_fastgpt(a.url)); return 0
p.error(f"unsupported command: {a.command}"); return 2
if __name__ == "__main__":
sys.exit(main())
"""Standalone CLI scripts for review-llm-skill."""
"""Compact prompt templates kept for compatibility."""
PROMPTS = {
"review": "基于当前分段和审查规则审查合同,仅输出JSON:{\"overall_conclusion\":\"\",\"findings\":[]}。\n分段:{segment_text}\n立场:{party_role}\n规则:{ruleset_text}",
"summary": "提取当前分段中与规则字段相关的客观事实,仅输出JSON:{\"facts\":{}}。\n分段:{segment_text}\n字段:{rule_fields}",
"router": "从候选规则中选择当前分段应执行的审查项,仅输出JSON:{\"selected_items\":[]}。\n分段:{segment_text}\n记忆:{context_memories_json}\n立场:{party_role}\n候选:{candidate_rules_json}",
"merger": "合并重复或相关的不合格findings,仅输出JSON:{\"findings\":[]}。\n输入:{payload}",
"reflect": "基于规则、已有findings和facts复核、去重、拆分、合并并定稿,仅输出JSON:{\"final_findings\":[]}。\n规则:{rule}\nfindings:{findings_json}\nfacts:{facts_json}\n立场:{party_role}",
"fact-merge": "合并summary_name下多个分段facts,不新增事实,仅输出JSON:{\"merge_facts\":{}}。\nsummary_names:{summary_names_json}\nfacts:{facts_json}",
"ruleset-route": "从候选ruleset_id中按问题选择一个,不得编造,仅输出JSON:{\"ruleset_id\":\"\",\"reason\":\"\"}。\n候选:{ruleset_ids_json}\n问题:{question}",
"party-role": "分析指定公司在合同中的商业角色,不仅按甲乙方判断,仅输出JSON:{\"party_role\":\"demand_side | supplier_side | unclear\",\"reason\":\"\"}。\n公司:{company_name}\n合同:{contract_text}",
"llm": "你是通用LLM助手。",
}
import os
import re
import json
import urllib.request
from typing import Any, List, Dict
class LLMTool:
def __init__(self, system_prompt: str = ""):
self.system_prompt = system_prompt or ""
self.model = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
self.base_url = (os.environ.get("OPENAI_BASE_URL") or "https://api.openai.com/v1").rstrip("/")
self.api_key = os.environ.get("OPENAI_API_KEY")
def build_messages(self, user_content: str, system_content: str | None = None) -> List[Dict[str, str]]:
msgs = []
if system_content:
msgs.append({"role": "system", "content": system_content})
msgs.append({"role": "user", "content": user_content})
return msgs
def run(self, messages: List[Dict[str, str]]) -> str:
if not self.api_key:
raise RuntimeError("OPENAI_API_KEY is required")
body = json.dumps({"model": self.model, "messages": messages}, ensure_ascii=False).encode("utf-8")
request = urllib.request.Request(
f"{self.base_url}/chat/completions",
data=body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
},
method="POST",
)
with urllib.request.urlopen(request, timeout=120) as response:
payload = json.loads(response.read().decode("utf-8"))
return (((payload.get("choices") or [{}])[0].get("message") or {}).get("content")) or ""
def chat_async(self, messages: List[Dict[str, str]]) -> str:
return self.run(messages)
def run_with_loop(self, chat_response: str) -> str:
return chat_response
def parse_first_json(self, text: str) -> Any:
if not text:
return None
try:
return json.loads(text)
except Exception:
pass
m = re.search(r"(\{.*\}|\[.*\])", text, re.S)
if not m:
return None
blob = m.group(1)
try:
return json.loads(blob)
except Exception:
return None
#!/usr/bin/env python3
"""Standalone review LLM CLI."""
from __future__ import annotations
import argparse, json, os, re, urllib.request
from pathlib import Path
from typing import Any
PROMPTS = {
"review": "基于当前分段和审查规则审查合同,仅输出JSON:{\"overall_conclusion\":\"\",\"findings\":[]}。\n分段:{segment_text}\n立场:{party_role}\n规则:{ruleset_text}",
"summary": "仅提取当前分段中与规则字段相关的客观事实,仅输出JSON:{\"facts\":{}}。\n分段:{segment_text}\n字段:{rule_fields}",
"router": "从候选规则中选择当前分段应执行的审查项,仅输出JSON:{\"selected_items\":[]}。\n分段:{segment_text}\n记忆:{context_memories_json}\n立场:{party_role}\n候选:{candidate_rules_json}",
"merger": "合并同一分段内重复或相关的不合格findings,仅输出JSON:{\"findings\":[]}。\n输入:{payload}",
"reflect": "基于规则、已有findings和facts复核、去重、拆分、合并并定稿,仅输出JSON:{\"final_findings\":[]}。\n规则:{rule}\nfindings:{findings_json}\nfacts:{facts_json}\n立场:{party_role}",
"fact-merge": "合并summary_name下多个分段facts,不新增事实,仅输出JSON:{\"merge_facts\":{}}。\nsummary_names:{summary_names_json}\nfacts:{facts_json}",
"ruleset-route": "从候选ruleset_id中按问题选择一个,不得编造,仅输出JSON:{\"ruleset_id\":\"\",\"reason\":\"\"}。\n候选:{ruleset_ids_json}\n问题:{question}",
"party-role": "分析指定公司在合同中的商业角色,不仅按甲乙方判断,仅输出JSON:{\"party_role\":\"demand_side | supplier_side | unclear\",\"reason\":\"\"}。\n公司:{company_name}\n合同:{contract_text}",
"llm": "你是通用LLM助手。",
}
class LLMTool:
def __init__(self, system_prompt: str = ""):
self.system_prompt = system_prompt
self.model = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
self.base_url = (os.environ.get("OPENAI_BASE_URL") or "https://api.openai.com/v1").rstrip("/")
self.api_key = os.environ.get("OPENAI_API_KEY")
def build_messages(self, user: str, system: str | None = None) -> list[dict[str, str]]:
return ([{"role": "system", "content": system}] if system else []) + [{"role": "user", "content": user}]
def run(self, messages: list[dict[str, str]]) -> str:
if not self.api_key:
raise RuntimeError("OPENAI_API_KEY is required")
body = json.dumps({"model": self.model, "messages": messages}, ensure_ascii=False).encode()
req = urllib.request.Request(f"{self.base_url}/chat/completions", data=body, headers={"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}, method="POST")
with urllib.request.urlopen(req, timeout=120) as resp:
data = json.loads(resp.read().decode())
return (((data.get("choices") or [{}])[0].get("message") or {}).get("content")) or ""
def parse_first_json(self, text: str) -> Any:
if not text:
return None
try:
return json.loads(text)
except Exception:
m = re.search(r"(\{.*\}|\[.*\])", text, re.S)
if not m:
return None
try:
return json.loads(m.group(1))
except Exception:
return None
def jdump(v: Any) -> str:
return json.dumps(v, ensure_ascii=False, indent=2)
def rules_text(rules: list[dict[str, Any]]) -> str:
return "\n".join(f"标题:{r.get('title','')}\n规则:{r.get('rule','')}\n等级:{r.get('level','')}\n建议:{r.get('suggestion_template','')}\n案例:{r.get('case','')}" for r in rules or [])
def default_rulesets() -> list[str]:
return ["合同信息提取(合同组)", "合同信息提取(技术部)", "合同信息提取(采购部)", "技术协议提取(合同组)", "技术协议提取(技术部)"]
def empty_fact(v: Any) -> bool:
return v is None or (isinstance(v, str) and (not v.strip() or v.strip() == "未明确")) or (isinstance(v, (dict, list)) and not v)
def dedupe(values: list[Any]) -> list[Any]:
out, seen = [], set()
for v in values:
key = jdump(v) if isinstance(v, (dict, list)) else str(v)
if not empty_fact(v) and key not in seen:
seen.add(key); out.append(v)
return out
def merge_facts_rule(facts: list[dict[str, Any]], names: list[str]) -> dict[str, Any]:
merged: dict[str, Any] = {}
for name in dict.fromkeys(str(n).strip() for n in names or [] if str(n).strip()):
vals = dedupe([item.get(name) for item in facts or [] if isinstance(item, dict) and name in item])
if vals:
merged[name] = vals[0] if len(vals) == 1 else vals
merged["_meta"] = {"summary_names": names, "source_fact_count": len(facts or [])}
return merged
def run_review_llm(tool_name: str = "review", segment_id: int = 0, user_prompt: str | None = None, **kw) -> dict[str, Any]:
name = (tool_name or "review").lower()
if name == "fact-merge" and str(kw.get("merge_mode") or "llm").lower() != "llm":
return {"merge_facts": merge_facts_rule(kw.get("facts") or [], kw.get("summary_names") or [])}
if name == "llm":
prompt, user = user_prompt or PROMPTS["llm"], kw.get("user_content") or kw.get("segment_text") or ""
elif name == "review":
prompt, user = PROMPTS[name].format(segment_text=kw.get("segment_text", ""), party_role=kw.get("party_role", ""), ruleset_text=rules_text(kw.get("rules") or [])), ""
elif name == "summary":
prompt, user = PROMPTS[name].format(segment_text=kw.get("segment_text", ""), rule_fields=jdump([r.get("summary") for r in kw.get("rules") or [] if r.get("summary")])), ""
elif name == "router":
prompt, user = PROMPTS[name].format(segment_text=kw.get("segment_text", ""), context_memories_json=jdump(kw.get("context_facts") or []), party_role=kw.get("party_role", ""), candidate_rules_json=jdump([{r.get("title", ""): r.get("rule", "")} for r in kw.get("rules") or []])), ""
elif name == "merger":
prompt, user = PROMPTS[name].format(payload=jdump(kw.get("payload") or kw.get("findings") or [])), ""
elif name == "reflect":
prompt, user = PROMPTS[name].format(rule=jdump(kw.get("rule") or {}), findings_json=jdump(kw.get("findings") or []), facts_json=jdump(kw.get("facts") or kw.get("context_facts") or []), party_role=kw.get("party_role", "")), ""
elif name == "fact-merge":
prompt, user = PROMPTS[name].format(summary_names_json=jdump(kw.get("summary_names") or []), facts_json=jdump(kw.get("facts") or [])), ""
elif name == "ruleset-route":
prompt, user = PROMPTS[name].format(question=kw.get("question") or user_prompt or "", ruleset_ids_json=jdump(kw.get("ruleset_ids") or default_rulesets())), ""
elif name == "party-role":
prompt, user = PROMPTS[name].format(company_name=kw.get("company_name") or "", contract_text=kw.get("contract_text") or kw.get("segment_text") or ""), ""
else:
return {"error": f"unknown tool: {tool_name}"}
llm = LLMTool(prompt); raw = llm.run(llm.build_messages(user, prompt)); return llm.parse_first_json(raw) or {"raw": raw}
def load_arg(v: str | None, default: Any) -> Any:
if v is None:
return default
return json.loads(Path(v[1:]).read_text(encoding="utf-8") if v.startswith("@") else v)
def parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(description="Standalone review LLM CLI")
p.add_argument("tool_name", nargs="?", default="review", choices=["review", "summary", "router", "merger", "reflect", "fact-merge", "ruleset-route", "party-role", "llm"])
for name, default in [("segment-id", 0), ("segment-text", ""), ("segment-text-file", None), ("party-role", ""), ("rules", "[]"), ("context-facts", "{}"), ("payload", None), ("findings", "[]"), ("facts", "[]"), ("rule", "{}"), ("summary-names", "[]"), ("question", ""), ("ruleset-ids", None), ("company-name", ""), ("contract-text", ""), ("contract-text-file", None), ("user-prompt", None), ("user-content", None)]:
flag = f"--{name}"; kwargs = {"default": default}
if name == "segment-id": kwargs["type"] = int
p.add_argument(flag, **kwargs)
p.add_argument("--merge-mode", choices=["llm", "rule"], default="llm"); p.add_argument("--output-raw", action="store_true")
return p
def main(argv: list[str] | None = None) -> int:
a = parser().parse_args(argv)
seg = Path(a.segment_text_file).read_text(encoding="utf-8") if a.segment_text_file else a.segment_text
contract = Path(a.contract_text_file).read_text(encoding="utf-8") if a.contract_text_file else a.contract_text
kw = {"segment_text": seg, "party_role": a.party_role, "rules": load_arg(a.rules, []), "context_facts": load_arg(a.context_facts, {}), "payload": load_arg(a.payload, []) if a.payload else None, "findings": load_arg(a.findings, []), "facts": load_arg(a.facts, []), "rule": load_arg(a.rule, {}), "summary_names": load_arg(a.summary_names, []), "merge_mode": a.merge_mode, "question": a.question, "ruleset_ids": load_arg(a.ruleset_ids, default_rulesets()) if a.ruleset_ids else default_rulesets(), "company_name": a.company_name, "contract_text": contract, "user_content": a.user_content}
res = run_review_llm(a.tool_name, a.segment_id, a.user_prompt, **kw)
print(res if a.output_raw else jdump(res))
return 0
if __name__ == "__main__":
raise SystemExit(main())
#!/usr/bin/env python3
"""Standalone JSON-backed review memory CLI."""
from __future__ import annotations
import argparse, json, logging
from dataclasses import asdict, dataclass
from pathlib import Path
from threading import RLock
from typing import Any
from uuid import uuid4
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger("review_memory_cli")
@dataclass
class Finding:
rule_title: str
segment_id: int
original_text: str
issue: str
risk_level: str
suggestion: str
id: str = ""
result: str = ""
@classmethod
def from_dict(cls, data: dict) -> "Finding":
d = data or {}
return cls(str(d.get("rule_title", "")), int(d.get("segment_id", 0) or 0), str(d.get("original_text", "")), str(d.get("issue", "")), str(d.get("risk_level", "")), str(d.get("suggestion", "")), str(d.get("id", "")), str(d.get("result", "")))
def to_dict(self) -> dict[str, Any]:
return asdict(self)
class MemoryStore:
def __init__(self, storage_name: str = "default.json") -> None:
self._storage_path = Path(__file__).resolve().parent.parent / "tmp" / storage_name
self._storage_path.parent.mkdir(parents=True, exist_ok=True)
self._lock = RLock()
self.facts: list[dict[str, Any]] = []
self.merge_facts: list[dict[str, Any]] = []
self.findings: dict[str, list[Finding]] = {}
self._load()
def _key(self, key: str | None) -> str:
return (key or "").strip().lower() or "review"
def add_fact(self, value: dict[str, Any]) -> list[dict[str, Any]]:
with self._lock:
self.facts.append(value); self._persist(); return self.facts
def add_merge_fact(self, value: dict[str, Any]) -> list[dict[str, Any]]:
with self._lock:
self.merge_facts.append(value); self._persist(); return self.merge_facts
def get_facts(self) -> list[dict[str, Any]]:
with self._lock:
return list(self.facts)
def add_finding(self, key: str, finding: Finding) -> Finding:
with self._lock:
if not finding.id:
finding.id = uuid4().hex
self.findings.setdefault(self._key(key), []).append(finding)
self._persist(); return finding
def list_findings(self, key: str | None = None) -> dict[str, list[dict[str, Any]]]:
with self._lock:
keys = [self._key(key)] if key else list(self.findings)
return {k: [f.to_dict() for f in self.findings.get(k, [])] for k in keys}
def get_findings_by_segment(self, key: str, segment_id: int) -> list[dict[str, Any]]:
return [f.to_dict() for f in self.findings.get(self._key(key), []) if f.segment_id == segment_id]
def search_findings(self, key: str, rule_title: str = "") -> list[dict[str, Any]]:
title = (rule_title or "").strip().lower()
return [f.to_dict() for f in self.findings.get(self._key(key), []) if not title or f.rule_title.lower() == title]
def delete_findings_by_segment(self, key: str, segment_id: int) -> int:
with self._lock:
k, current = self._key(key), list(self.findings.get(self._key(key), []))
self.findings[k] = [f for f in current if f.segment_id != segment_id]
removed = len(current) - len(self.findings[k])
if removed:
self._persist()
return removed
def search_facts(self, keywords: list[str]) -> list[Any]:
keys = [str(k).strip().lower() for k in keywords if str(k).strip()]
out = []
for item in self.facts:
for name, value in item.items():
low = str(name).lower()
if any(k in low or low in k for k in keys):
out.append({name: value})
return out
def clear(self) -> None:
with self._lock:
self.facts.clear(); self.merge_facts.clear(); self.findings.clear(); self._persist()
def _payload(self) -> dict[str, Any]:
return {"facts": self.facts, "merge_facts": self.merge_facts, "findings": {k: [f.to_dict() for f in v] for k, v in self.findings.items()}}
def _persist(self) -> None:
self._storage_path.write_text(json.dumps(self._payload(), ensure_ascii=False, indent=2), encoding="utf-8")
def _load(self) -> None:
if not self._storage_path.exists():
return
try:
data = json.loads(self._storage_path.read_text(encoding="utf-8") or "{}")
self.facts = data.get("facts") or []; self.merge_facts = data.get("merge_facts") or []
self.findings = {self._key(k): [Finding.from_dict(i) for i in items or []] for k, items in (data.get("findings") or {}).items()}
except Exception as exc:
logger.error("Failed to load memory store: %s", exc)
def export_to_json(self, path: str | None = None) -> str:
out = path or str(self._storage_path).replace(".json", "_export.json")
Path(out).write_text(json.dumps(self._payload(), ensure_ascii=False, indent=2), encoding="utf-8")
return out
def out(obj: Any) -> None:
print(json.dumps(obj, ensure_ascii=False, indent=2))
def load_json_arg(value: str) -> Any:
return json.loads(Path(value[1:]).read_text(encoding="utf-8") if value.startswith("@") else value)
def parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(prog="review-memory-cli"); p.add_argument("--storage", default="default.json")
sub = p.add_subparsers(dest="cmd")
sub.add_parser("list-facts")
a = sub.add_parser("add-fact"); a.add_argument("data")
a = sub.add_parser("add-merge-fact"); a.add_argument("data")
a = sub.add_parser("search-facts"); a.add_argument("keywords", nargs="+")
a = sub.add_parser("add-finding"); a.add_argument("--key", default="review"); a.add_argument("--rule", required=True); a.add_argument("--segment", type=int, default=0); a.add_argument("--original", default=""); a.add_argument("--issue", default=""); a.add_argument("--risk", default=""); a.add_argument("--suggest", default=""); a.add_argument("--result", default="")
sub.add_parser("list-findings")
a = sub.add_parser("list-findings-key"); a.add_argument("key")
a = sub.add_parser("findings-by-seg"); a.add_argument("key"); a.add_argument("segment", type=int)
a = sub.add_parser("search-findings"); a.add_argument("key"); a.add_argument("--rule-title", default="")
a = sub.add_parser("delete-findings-seg"); a.add_argument("key"); a.add_argument("segment", type=int)
sub.add_parser("clear")
a = sub.add_parser("export"); a.add_argument("--out")
return p
def main(argv: list[str] | None = None) -> int:
a = parser().parse_args(argv); store = MemoryStore(a.storage)
if a.cmd == "list-facts": out(store.get_facts()); return 0
if a.cmd == "add-fact": store.add_fact(load_json_arg(a.data)); print("OK"); return 0
if a.cmd == "add-merge-fact": store.add_merge_fact(load_json_arg(a.data)); print("OK"); return 0
if a.cmd == "search-facts": out(store.search_facts(a.keywords)); return 0
if a.cmd == "add-finding":
out(store.add_finding(a.key, Finding(a.rule, a.segment, a.original, a.issue, a.risk, a.suggest, result=a.result)).to_dict()); return 0
if a.cmd == "list-findings": out(store.list_findings()); return 0
if a.cmd == "list-findings-key": out(store.list_findings(a.key)); return 0
if a.cmd == "findings-by-seg": out(store.get_findings_by_segment(a.key, a.segment)); return 0
if a.cmd == "search-findings": out(store.search_findings(a.key, a.rule_title)); return 0
if a.cmd == "delete-findings-seg": print(store.delete_findings_by_segment(a.key, a.segment)); return 0
if a.cmd == "clear": store.clear(); print("cleared"); return 0
if a.cmd == "export": print(store.export_to_json(a.out)); return 0
parser().print_help(); return 1
if __name__ == "__main__":
raise SystemExit(main())
import argparse
import json
import re
import sys
from pathlib import Path
from urllib.parse import unquote, urlparse
import requests
from loguru import logger
DEFAULT_OUTER_BACKEND_URL = "https://172.21.107.45:48080"
DEFAULT_BASE_FASTGPT_URL = "http://172.21.107.45:3030"
DEFAULT_BASE_BACKEND_URL = "http://172.21.107.45:1122"
DEFAULT_BACKEND_ADMIN_USERNAME = "admin"
DEFAULT_BACKEND_ADMIN_PASSWORD = "admin@jpai.com"
base_fastgpt_url = DEFAULT_BASE_FASTGPT_URL
base_backend_url = DEFAULT_BASE_BACKEND_URL
outer_backend_url = DEFAULT_OUTER_BACKEND_URL
backend_admin_username = DEFAULT_BACKEND_ADMIN_USERNAME
backend_admin_password = DEFAULT_BACKEND_ADMIN_PASSWORD
def configure_urls(
fastgpt_url: str | None = None,
backend_url: str | None = None,
outer_url: str | None = None,
):
global base_fastgpt_url, base_backend_url, outer_backend_url
if fastgpt_url is not None:
base_fastgpt_url = fastgpt_url
if backend_url is not None:
base_backend_url = backend_url
if outer_url is not None:
outer_backend_url = outer_url
def configure_login(username: str | None = None, password: str | None = None):
global backend_admin_username, backend_admin_password
if username is not None:
backend_admin_username = username
if password is not None:
backend_admin_password = password
def _strip_trailing_slash(url: str | None) -> str | None:
if url is None:
return None
return url.rstrip("/")
def upload_file(path, input_url_to_inner=True, output_url_to_inner=False) -> str:
from requests_toolbelt import MultipartEncoder
login_data = {
"username": backend_admin_username,
"password": backend_admin_password,
}
login_url = f"{base_backend_url}/admin-api/system/auth/login"
response = requests.post(
url=login_url,
headers={"Content-Type": "application/json"},
data=json.dumps(login_data),
)
response.raise_for_status()
try:
token = json.loads(response.text).get("data").get("accessToken")
except Exception as e:
logger.error(f"后端登录异常:{e}")
raise
upload_url = f"{base_backend_url}/admin-api/infra/file/upload"
with open(path, "rb") as file_obj:
encoder = MultipartEncoder(fields={"file": (Path(path).name, file_obj)})
response = requests.post(
url=upload_url,
headers={"Content-Type": encoder.content_type, "Authorization": token},
data=encoder,
)
response.raise_for_status()
res = json.loads(response.text).get("data")
if res:
return res
raise Exception(f"上传{path}失败 Response text: {response.text}")
def _download_basename(filename: str) -> str:
filename = unquote(filename.strip().strip('"'))
filename = filename.replace("\\", "/")
return Path(filename).name or "downloaded_file"
def _resolve_download_filename(url: str, response: requests.Response) -> str:
content_disposition = response.headers.get("content-disposition", "")
if content_disposition:
match = re.search(
r"filename\*=(?:UTF-8''|utf-8'')?([^;]+)", content_disposition
)
if match:
return _download_basename(match.group(1))
match = re.search(r'filename="?([^";]+)"?', content_disposition)
if match:
return _download_basename(match.group(1))
url_filename = _download_basename(urlparse(url).path)
if url_filename:
return url_filename
return "downloaded_file"
def download_file(url, path, input_url_to_inner=True):
if not url.startswith("http:") and not url.startswith("https:"):
url = base_fastgpt_url + url
url = url.replace(outer_backend_url, base_backend_url)
logger.info(f"url准备下载:{url}")
response = requests.get(url)
if response.status_code == 200:
target_path = Path(path)
if target_path.exists() and target_path.is_dir():
target_path = target_path / _resolve_download_filename(url, response)
target_path.parent.mkdir(parents=True, exist_ok=True)
with open(target_path, "wb") as f:
f.write(response.content)
logger.info(f"{url}文件下载成功,保存到{target_path}")
return str(target_path)
logger.error(f"{url}文件下载失败. HTTP Status Code: {response.status_code}")
return None
def url_replace_fastgpt(origin: str):
if not origin.startswith("http:"):
origin = base_fastgpt_url + origin
return origin
def _add_common_url_args(parser: argparse.ArgumentParser):
parser.add_argument(
"--base-fastgpt-url",
default=DEFAULT_BASE_FASTGPT_URL,
help=f"FastGPT 内网基础地址,默认:{DEFAULT_BASE_FASTGPT_URL}",
)
parser.add_argument(
"--base-backend-url",
default=DEFAULT_BASE_BACKEND_URL,
help=f"后端内网基础地址,默认:{DEFAULT_BASE_BACKEND_URL}",
)
parser.add_argument(
"--outer-backend-url",
default=DEFAULT_OUTER_BACKEND_URL,
help=f"后端外网地址,下载时会替换为内网地址,默认:{DEFAULT_OUTER_BACKEND_URL}",
)
def build_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="单文件上传/下载工具:通过后端接口上传文件,或下载 FastGPT/后端文件 URL。"
)
parser.set_defaults(command=None)
subparsers = parser.add_subparsers(dest="command", required=True)
upload_parser = subparsers.add_parser("upload", help="上传本地文件。")
_add_common_url_args(upload_parser)
upload_parser.add_argument(
"--username",
default=DEFAULT_BACKEND_ADMIN_USERNAME,
help=f"后端管理员用户名,默认:{DEFAULT_BACKEND_ADMIN_USERNAME}",
)
upload_parser.add_argument(
"--password",
default=DEFAULT_BACKEND_ADMIN_PASSWORD,
help=f"后端管理员密码,默认:{DEFAULT_BACKEND_ADMIN_PASSWORD}",
)
upload_parser.add_argument("path", help="要上传的本地文件路径。")
download_parser = subparsers.add_parser("download", help="下载 URL 到本地路径。")
_add_common_url_args(download_parser)
download_parser.add_argument("url", help="HTTP URL 或 FastGPT/后端相对路径。")
download_parser.add_argument(
"path", help="输出文件路径;如果是已存在目录,则自动解析文件名。"
)
normalize_parser = subparsers.add_parser(
"normalize-url", help="把 FastGPT 相对路径补全为绝对 URL。"
)
_add_common_url_args(normalize_parser)
normalize_parser.add_argument("url", help="HTTP URL 或 FastGPT 相对路径。")
return parser
def main(argv: list[str] | None = None) -> int:
parser = build_arg_parser()
args = parser.parse_args(argv)
configure_urls(
fastgpt_url=_strip_trailing_slash(args.base_fastgpt_url),
backend_url=_strip_trailing_slash(args.base_backend_url),
outer_url=_strip_trailing_slash(args.outer_backend_url),
)
if args.command == "upload":
configure_login(username=args.username, password=args.password)
if args.command == "upload":
print(upload_file(args.path))
return 0
if args.command == "download":
saved_path = download_file(args.url, args.path)
if saved_path is None:
return 1
print(saved_path)
return 0
if args.command == "normalize-url":
print(url_replace_fastgpt(args.url))
return 0
parser.error(f"unsupported command: {args.command}")
return 2
if __name__ == "__main__":
sys.exit(main())
......@@ -26,6 +26,7 @@ class OpenAITool:
msg = msg[1:]
# deepseek专用关闭思考
extra_body["thinking"] = {"type": "disabled"}
extra_body["chat_template_kwargs"] = {"enable_thinking": False}
try:
response = await self.client.chat.completions.create(
model=self.llm_config.model, messages=msg, extra_body=extra_body
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment