Commit 3ac7da45 by ccran

feat: add skills;

parent 3f7f704b
......@@ -18,10 +18,15 @@ class LLMConfig:
api_key: str = "none"
model: str = "Qwen2-72B-Instruct"
# 最大分片数量
min_single_chunk_size = 2000
max_single_chunk_size = 100000
max_chunk_page = 10
MAX_SINGLE_CHUNK_SIZE=100000
MERGE_RULE_PROMPT = False
MAX_SINGLE_CHUNK_SIZE = 100000
# MAX_SINGLE_CHUNK_SIZE = 5000
# MAX_SINGLE_CHUNK_SIZE = 2000
MERGE_RULE_PROMPT = False
META_KEY = "META"
DEFAULT_RULESET_ID = "通用"
## 规则集ID列表,需与rules.xlsx中的sheet名称保持一致!!!
......@@ -110,7 +115,3 @@ LLM = {
}
doc_support_formats = [".docx", ".doc", ".wps"]
pdf_support_formats = [".txt", ".md", ".pdf"]
# 最大分片数量
min_single_chunk_size = 2000
max_single_chunk_size = 20000
max_chunk_page = 10
......@@ -19,7 +19,7 @@ batch_size = 5
if not use_lufa:
SUFFIX = "_麓发迁移"
batch_input_dir_path = "jp-input"
batch_output_dir_path = f"/data/home/htsc/jp-contract/data/benchmark/results/jp-output-lufa-{time.strftime('%Y%m%d-%H%M%S', time.localtime())}"
batch_output_dir_path = f"/data/home/htsc/jp-contract/data/benchmark/results/jp-output-lufa-chunk100000"
# 金盘fastgpt接口
url = "http://172.21.107.45:3002/api/v1/chat/completions"
# 金盘迁移麓发合同审查测试token
......
import argparse
import json
import re
import sys
from pathlib import Path
from urllib.parse import unquote, urlparse
import requests
from loguru import logger
DEFAULT_OUTER_BACKEND_URL = "https://172.21.107.45:48080"
DEFAULT_BASE_FASTGPT_URL = "http://172.21.107.45:3030"
DEFAULT_BASE_BACKEND_URL = "http://172.21.107.45:1122"
DEFAULT_BACKEND_ADMIN_USERNAME = "admin"
DEFAULT_BACKEND_ADMIN_PASSWORD = "admin@jpai.com"
base_fastgpt_url = DEFAULT_BASE_FASTGPT_URL
base_backend_url = DEFAULT_BASE_BACKEND_URL
outer_backend_url = DEFAULT_OUTER_BACKEND_URL
backend_admin_username = DEFAULT_BACKEND_ADMIN_USERNAME
backend_admin_password = DEFAULT_BACKEND_ADMIN_PASSWORD
def configure_urls(
fastgpt_url: str | None = None,
backend_url: str | None = None,
outer_url: str | None = None,
):
global base_fastgpt_url, base_backend_url, outer_backend_url
if fastgpt_url is not None:
base_fastgpt_url = fastgpt_url
if backend_url is not None:
base_backend_url = backend_url
if outer_url is not None:
outer_backend_url = outer_url
def configure_login(username: str | None = None, password: str | None = None):
global backend_admin_username, backend_admin_password
if username is not None:
backend_admin_username = username
if password is not None:
backend_admin_password = password
def _strip_trailing_slash(url: str | None) -> str | None:
if url is None:
return None
return url.rstrip("/")
def upload_file(path, input_url_to_inner=True, output_url_to_inner=False) -> str:
from requests_toolbelt import MultipartEncoder
login_data = {
"username": backend_admin_username,
"password": backend_admin_password,
}
login_url = f"{base_backend_url}/admin-api/system/auth/login"
response = requests.post(
url=login_url,
headers={"Content-Type": "application/json"},
data=json.dumps(login_data),
)
response.raise_for_status()
try:
token = json.loads(response.text).get("data").get("accessToken")
except Exception as e:
logger.error(f"后端登录异常:{e}")
raise
upload_url = f"{base_backend_url}/admin-api/infra/file/upload"
with open(path, "rb") as file_obj:
encoder = MultipartEncoder(fields={"file": (Path(path).name, file_obj)})
response = requests.post(
url=upload_url,
headers={"Content-Type": encoder.content_type, "Authorization": token},
data=encoder,
)
response.raise_for_status()
res = json.loads(response.text).get("data")
if res:
return res
raise Exception(f"上传{path}失败 Response text: {response.text}")
def _download_basename(filename: str) -> str:
filename = unquote(filename.strip().strip('"'))
filename = filename.replace("\\", "/")
return Path(filename).name or "downloaded_file"
def _resolve_download_filename(url: str, response: requests.Response) -> str:
content_disposition = response.headers.get("content-disposition", "")
if content_disposition:
match = re.search(
r"filename\*=(?:UTF-8''|utf-8'')?([^;]+)", content_disposition
)
if match:
return _download_basename(match.group(1))
match = re.search(r'filename="?([^";]+)"?', content_disposition)
if match:
return _download_basename(match.group(1))
url_filename = _download_basename(urlparse(url).path)
if url_filename:
return url_filename
return "downloaded_file"
def download_file(url, path, input_url_to_inner=True):
if not url.startswith("http:") and not url.startswith("https:"):
url = base_fastgpt_url + url
url = url.replace(outer_backend_url, base_backend_url)
logger.info(f"url准备下载:{url}")
response = requests.get(url)
if response.status_code == 200:
target_path = Path(path)
if target_path.exists() and target_path.is_dir():
target_path = target_path / _resolve_download_filename(url, response)
target_path.parent.mkdir(parents=True, exist_ok=True)
with open(target_path, "wb") as f:
f.write(response.content)
logger.info(f"{url}文件下载成功,保存到{target_path}")
return str(target_path)
logger.error(f"{url}文件下载失败. HTTP Status Code: {response.status_code}")
return None
def url_replace_fastgpt(origin: str):
if not origin.startswith("http:"):
origin = base_fastgpt_url + origin
return origin
def _add_common_url_args(parser: argparse.ArgumentParser):
parser.add_argument(
"--base-fastgpt-url",
default=DEFAULT_BASE_FASTGPT_URL,
help=f"FastGPT 内网基础地址,默认:{DEFAULT_BASE_FASTGPT_URL}",
)
parser.add_argument(
"--base-backend-url",
default=DEFAULT_BASE_BACKEND_URL,
help=f"后端内网基础地址,默认:{DEFAULT_BASE_BACKEND_URL}",
)
parser.add_argument(
"--outer-backend-url",
default=DEFAULT_OUTER_BACKEND_URL,
help=f"后端外网地址,下载时会替换为内网地址,默认:{DEFAULT_OUTER_BACKEND_URL}",
)
def build_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="单文件上传/下载工具:通过后端接口上传文件,或下载 FastGPT/后端文件 URL。"
)
parser.set_defaults(command=None)
subparsers = parser.add_subparsers(dest="command", required=True)
upload_parser = subparsers.add_parser("upload", help="上传本地文件。")
_add_common_url_args(upload_parser)
upload_parser.add_argument(
"--username",
default=DEFAULT_BACKEND_ADMIN_USERNAME,
help=f"后端管理员用户名,默认:{DEFAULT_BACKEND_ADMIN_USERNAME}",
)
upload_parser.add_argument(
"--password",
default=DEFAULT_BACKEND_ADMIN_PASSWORD,
help=f"后端管理员密码,默认:{DEFAULT_BACKEND_ADMIN_PASSWORD}",
)
upload_parser.add_argument("path", help="要上传的本地文件路径。")
download_parser = subparsers.add_parser("download", help="下载 URL 到本地路径。")
_add_common_url_args(download_parser)
download_parser.add_argument("url", help="HTTP URL 或 FastGPT/后端相对路径。")
download_parser.add_argument(
"path", help="输出文件路径;如果是已存在目录,则自动解析文件名。"
)
normalize_parser = subparsers.add_parser(
"normalize-url", help="把 FastGPT 相对路径补全为绝对 URL。"
)
_add_common_url_args(normalize_parser)
normalize_parser.add_argument("url", help="HTTP URL 或 FastGPT 相对路径。")
return parser
def main(argv: list[str] | None = None) -> int:
parser = build_arg_parser()
args = parser.parse_args(argv)
configure_urls(
fastgpt_url=_strip_trailing_slash(args.base_fastgpt_url),
backend_url=_strip_trailing_slash(args.base_backend_url),
outer_url=_strip_trailing_slash(args.outer_backend_url),
)
if args.command == "upload":
configure_login(username=args.username, password=args.password)
if args.command == "upload":
print(upload_file(args.path))
return 0
if args.command == "download":
saved_path = download_file(args.url, args.path)
if saved_path is None:
return 1
print(saved_path)
return 0
if args.command == "normalize-url":
print(url_replace_fastgpt(args.url))
return 0
parser.error(f"unsupported command: {args.command}")
return 2
if __name__ == "__main__":
sys.exit(main())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment