From 3a297dda3fb0cb741cdbfba7b06d5b03e5f0495e Mon Sep 17 00:00:00 2001 From: lintsinghua Date: Mon, 8 Dec 2025 21:45:58 +0800 Subject: [PATCH] feat: add json-repair library as fallback for LLM response parsing - Add json-repair>=0.30.0 to dependencies - Add _repair_json_with_library method as final fallback in JSON parsing - Update version to 2.0.0-beta.6 --- README.md | 2 +- backend/app/services/llm/service.py | 35 ++++++ backend/pyproject.toml | 1 + backend/requirements-lock.txt | 172 ++++++++++++++++++++++++++-- backend/requirements.txt | 1 + backend/uv.lock | 105 +++++++++-------- frontend/package.json | 2 +- 7 files changed, 261 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 21c7ae7..47267e3 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@
-[![Version](https://img.shields.io/badge/version-2.0.0--beta.5-blue.svg)](https://github.com/lintsinghua/DeepAudit/releases) +[![Version](https://img.shields.io/badge/version-2.0.0--beta.6-blue.svg)](https://github.com/lintsinghua/DeepAudit/releases) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![React](https://img.shields.io/badge/React-18-61dafb.svg)](https://reactjs.org/) [![TypeScript](https://img.shields.io/badge/TypeScript-5.7-3178c6.svg)](https://www.typescriptlang.org/) diff --git a/backend/app/services/llm/service.py b/backend/app/services/llm/service.py index a99347d..ff05d56 100644 --- a/backend/app/services/llm/service.py +++ b/backend/app/services/llm/service.py @@ -11,6 +11,13 @@ from .types import LLMConfig, LLMProvider, LLMMessage, LLMRequest, DEFAULT_MODEL from .factory import LLMFactory from app.core.config import settings +# json-repair 库用于修复损坏的 JSON +try: + from json_repair import repair_json + JSON_REPAIR_AVAILABLE = True +except ImportError: + JSON_REPAIR_AVAILABLE = False + logger = logging.getLogger(__name__) @@ -458,6 +465,8 @@ Please analyze the following code: lambda: self._fix_truncated_json(clean_text(text)), # 6. 激进修复后解析 lambda: json.loads(aggressive_fix_json(text)), + # 7. 使用 json-repair 库作为最终兜底方案 + lambda: self._repair_json_with_library(text), ] last_error = None @@ -574,6 +583,32 @@ Please analyze the following code: json_str = re.sub(r',(\s*[}\]])', r'\1', json_str) return json.loads(json_str) + def _repair_json_with_library(self, text: str) -> Dict[str, Any]: + """使用 json-repair 库修复损坏的 JSON(兜底方案)""" + if not JSON_REPAIR_AVAILABLE: + raise ValueError("json-repair library not available") + + # 先尝试提取 JSON 部分 + start_idx = text.find('{') + if start_idx == -1: + raise ValueError("No JSON object found for repair") + + # 尝试找到最后一个 } + end_idx = text.rfind('}') + if end_idx > start_idx: + json_str = text[start_idx:end_idx + 1] + else: + json_str = text[start_idx:] + + # 使用 json-repair 修复 + repaired = repair_json(json_str, return_objects=True) + + if isinstance(repaired, dict): + logger.info("✅ json-repair 库成功修复 JSON") + return repaired + + raise ValueError(f"json-repair returned unexpected type: {type(repaired)}") + def _get_default_response(self) -> Dict[str, Any]: """返回默认响应""" return { diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 71df2a3..e967e4b 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -22,4 +22,5 @@ dependencies = [ "reportlab>=4.0.0", "weasyprint>=66.0", "jinja2>=3.1.6", + "json-repair>=0.30.0", ] diff --git a/backend/requirements-lock.txt b/backend/requirements-lock.txt index 4f23689..cb03ddb 100644 --- a/backend/requirements-lock.txt +++ b/backend/requirements-lock.txt @@ -1,5 +1,11 @@ # This file was autogenerated by uv via the following command: # uv pip compile requirements.txt -o requirements-lock.txt +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.13.2 + # via litellm +aiosignal==1.4.0 + # via aiohttp alembic==1.17.2 # via -r requirements.txt annotated-doc==0.0.4 @@ -9,46 +15,135 @@ annotated-types==0.7.0 anyio==4.11.0 # via # httpx + # openai # starlette # watchfiles asyncpg==0.31.0 # via -r requirements.txt -bcrypt==5.0.0 - # via passlib +attrs==25.4.0 + # via + # aiohttp + # jsonschema + # referencing +bcrypt==4.3.0 + # via + # -r requirements.txt + # passlib +brotli==1.2.0 + # via fonttools certifi==2025.11.12 # via # httpcore # httpx + # requests cffi==2.0.0 - # via cryptography + # via + # cryptography + # weasyprint +charset-normalizer==3.4.4 + # via + # reportlab + # requests click==8.3.1 - # via uvicorn + # via + # litellm + # typer-slim + # uvicorn cryptography==46.0.3 # via python-jose +cssselect2==0.8.0 + # via weasyprint +distro==1.9.0 + # via openai +dnspython==2.8.0 + # via email-validator ecdsa==0.19.1 # via python-jose +email-validator==2.3.0 + # via -r requirements.txt fastapi==0.122.0 # via -r requirements.txt +fastuuid==0.14.0 + # via litellm +filelock==3.20.0 + # via huggingface-hub +fonttools==4.61.0 + # via weasyprint +frozenlist==1.8.0 + # via + # aiohttp + # aiosignal +fsspec==2025.12.0 + # via huggingface-hub +greenlet==3.3.0 + # via -r requirements.txt +grpcio==1.67.1 + # via litellm h11==0.16.0 # via # httpcore # uvicorn +hf-xet==1.2.0 + # via huggingface-hub httpcore==1.0.9 # via httpx httptools==0.7.1 # via uvicorn httpx==0.28.1 - # via -r requirements.txt + # via + # -r requirements.txt + # huggingface-hub + # litellm + # openai +huggingface-hub==1.2.1 + # via tokenizers idna==3.11 # via # anyio + # email-validator # httpx + # requests + # yarl +importlib-metadata==8.7.0 + # via litellm +jinja2==3.1.6 + # via + # -r requirements.txt + # litellm +jiter==0.12.0 + # via openai +json-repair==0.54.2 + # via -r requirements.txt +jsonschema==4.25.1 + # via litellm +jsonschema-specifications==2025.9.1 + # via jsonschema +litellm==1.80.8 + # via -r requirements.txt mako==1.3.10 # via alembic markupsafe==3.0.3 - # via mako + # via + # jinja2 + # mako +multidict==6.7.0 + # via + # aiohttp + # yarl +openai==2.9.0 + # via litellm +packaging==25.0 + # via huggingface-hub passlib==1.7.4 # via -r requirements.txt +pillow==12.0.0 + # via + # reportlab + # weasyprint +propcache==0.4.1 + # via + # aiohttp + # yarl pyasn1==0.6.1 # via # python-jose @@ -59,13 +154,20 @@ pydantic==2.12.4 # via # -r requirements.txt # fastapi + # litellm + # openai # pydantic-settings pydantic-core==2.41.5 # via pydantic pydantic-settings==2.12.0 # via -r requirements.txt +pydyf==0.12.1 + # via weasyprint +pyphen==0.17.2 + # via weasyprint python-dotenv==1.2.1 # via + # litellm # pydantic-settings # uvicorn python-jose==3.5.0 @@ -73,36 +175,90 @@ python-jose==3.5.0 python-multipart==0.0.20 # via -r requirements.txt pyyaml==6.0.3 - # via uvicorn + # via + # huggingface-hub + # uvicorn +referencing==0.37.0 + # via + # jsonschema + # jsonschema-specifications +regex==2025.11.3 + # via tiktoken +reportlab==4.4.5 + # via -r requirements.txt +requests==2.32.5 + # via tiktoken +rpds-py==0.30.0 + # via + # jsonschema + # referencing rsa==4.9.1 # via python-jose +shellingham==1.5.4 + # via huggingface-hub six==1.17.0 # via ecdsa sniffio==1.3.1 - # via anyio + # via + # anyio + # openai sqlalchemy==2.0.44 # via # -r requirements.txt # alembic starlette==0.50.0 # via fastapi +tiktoken==0.12.0 + # via litellm +tinycss2==1.5.1 + # via + # cssselect2 + # weasyprint +tinyhtml5==2.0.0 + # via weasyprint +tokenizers==0.22.1 + # via litellm +tqdm==4.67.1 + # via + # huggingface-hub + # openai +typer-slim==0.20.0 + # via huggingface-hub typing-extensions==4.15.0 # via # alembic # fastapi + # huggingface-hub + # openai # pydantic # pydantic-core # sqlalchemy + # typer-slim # typing-inspection typing-inspection==0.4.2 # via # pydantic # pydantic-settings +urllib3==2.6.0 + # via requests uvicorn==0.38.0 # via -r requirements.txt uvloop==0.22.1 # via uvicorn watchfiles==1.1.1 # via uvicorn +weasyprint==67.0 + # via -r requirements.txt +webencodings==0.5.1 + # via + # cssselect2 + # tinycss2 + # tinyhtml5 websockets==15.0.1 # via uvicorn +yarl==1.22.0 + # via aiohttp +zipp==3.23.0 + # via importlib-metadata +zopfli==0.4.0 + # via fonttools diff --git a/backend/requirements.txt b/backend/requirements.txt index 5a7e1fb..73fda9c 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -16,3 +16,4 @@ litellm>=1.0.0 reportlab>=4.0.0 weasyprint>=66.0 jinja2>=3.1.6 +json-repair>=0.30.0 diff --git a/backend/uv.lock b/backend/uv.lock index 59900c4..8ea3d1b 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -456,6 +456,55 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0f/e7/aa315e6a749d9b96c2504a1ba0ba031ba2d0517e972ce22682e3fccecb09/cssselect2-0.8.0-py3-none-any.whl", hash = "sha256:46fc70ebc41ced7a32cd42d58b1884d72ade23d21e5a4eaaf022401c13f0e76e", size = 15454, upload-time = "2025-03-05T14:46:06.463Z" }, ] +[[package]] +name = "deepaudit-backend" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "alembic" }, + { name = "asyncpg" }, + { name = "bcrypt" }, + { name = "email-validator" }, + { name = "fastapi" }, + { name = "greenlet" }, + { name = "httpx" }, + { name = "jinja2" }, + { name = "json-repair" }, + { name = "litellm" }, + { name = "passlib", extra = ["bcrypt"] }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "python-jose", extra = ["cryptography"] }, + { name = "python-multipart" }, + { name = "reportlab" }, + { name = "sqlalchemy" }, + { name = "uvicorn", extra = ["standard"] }, + { name = "weasyprint" }, +] + +[package.metadata] +requires-dist = [ + { name = "alembic" }, + { name = "asyncpg" }, + { name = "bcrypt", specifier = "<5.0.0" }, + { name = "email-validator" }, + { name = "fastapi", specifier = ">=0.100.0" }, + { name = "greenlet" }, + { name = "httpx" }, + { name = "jinja2", specifier = ">=3.1.6" }, + { name = "json-repair", specifier = ">=0.30.0" }, + { name = "litellm", specifier = ">=1.0.0" }, + { name = "passlib", extras = ["bcrypt"] }, + { name = "pydantic", specifier = ">=2.0.0" }, + { name = "pydantic-settings" }, + { name = "python-jose", extras = ["cryptography"] }, + { name = "python-multipart" }, + { name = "reportlab", specifier = ">=4.0.0" }, + { name = "sqlalchemy", specifier = ">=2.0.0" }, + { name = "uvicorn", extras = ["standard"] }, + { name = "weasyprint", specifier = ">=66.0" }, +] + [[package]] name = "distro" version = "1.9.0" @@ -913,6 +962,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/97/9a/3c5391907277f0e55195550cf3fa8e293ae9ee0c00fb402fec1e38c0c82f/jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473", size = 185564, upload-time = "2025-11-09T20:48:50.376Z" }, ] +[[package]] +name = "json-repair" +version = "0.54.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ff/05/9fbcd5ffab9c41455e7d80af65a90876718b8ea2fb4525e187ab11836dd4/json_repair-0.54.2.tar.gz", hash = "sha256:4b6b62ce17f1a505b220fa4aadba1fc37dc9c221544f158471efe3775620bad6", size = 38575, upload-time = "2025-11-25T19:31:22.768Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/3a/1b4df9adcd69fee9c9e4b439c13e8c866f2fae520054aede7030b2278be9/json_repair-0.54.2-py3-none-any.whl", hash = "sha256:be51cce5dca97e0c24ebdf61a1ede2449a8a7666012de99467bb7b0afb35179b", size = 29322, upload-time = "2025-11-25T19:31:21.492Z" }, +] + [[package]] name = "jsonschema" version = "4.25.1" @@ -2011,53 +2069,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" }, ] -[[package]] -name = "deepaudit-backend" -version = "0.1.0" -source = { virtual = "." } -dependencies = [ - { name = "alembic" }, - { name = "asyncpg" }, - { name = "bcrypt" }, - { name = "email-validator" }, - { name = "fastapi" }, - { name = "greenlet" }, - { name = "httpx" }, - { name = "jinja2" }, - { name = "litellm" }, - { name = "passlib", extra = ["bcrypt"] }, - { name = "pydantic" }, - { name = "pydantic-settings" }, - { name = "python-jose", extra = ["cryptography"] }, - { name = "python-multipart" }, - { name = "reportlab" }, - { name = "sqlalchemy" }, - { name = "uvicorn", extra = ["standard"] }, - { name = "weasyprint" }, -] - -[package.metadata] -requires-dist = [ - { name = "alembic" }, - { name = "asyncpg" }, - { name = "bcrypt", specifier = "<5.0.0" }, - { name = "email-validator" }, - { name = "fastapi", specifier = ">=0.100.0" }, - { name = "greenlet" }, - { name = "httpx" }, - { name = "jinja2", specifier = ">=3.1.6" }, - { name = "litellm", specifier = ">=1.0.0" }, - { name = "passlib", extras = ["bcrypt"] }, - { name = "pydantic", specifier = ">=2.0.0" }, - { name = "pydantic-settings" }, - { name = "python-jose", extras = ["cryptography"] }, - { name = "python-multipart" }, - { name = "reportlab", specifier = ">=4.0.0" }, - { name = "sqlalchemy", specifier = ">=2.0.0" }, - { name = "uvicorn", extras = ["standard"] }, - { name = "weasyprint", specifier = ">=66.0" }, -] - [[package]] name = "yarl" version = "1.22.0" diff --git a/frontend/package.json b/frontend/package.json index c778fe2..edd83d2 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "deep-audit", - "version": "2.0.0-beta.5", + "version": "2.0.0-beta.6", "type": "module", "scripts": { "dev": "vite",