feat: add json-repair library as fallback for LLM response parsing

- Add json-repair>=0.30.0 to dependencies
- Add _repair_json_with_library method as final fallback in JSON parsing
- Update version to 2.0.0-beta.6
This commit is contained in:
lintsinghua 2025-12-08 21:45:58 +08:00
parent 4fa99b7615
commit 3a297dda3f
7 changed files with 261 additions and 57 deletions

View File

@ -8,7 +8,7 @@
<div align="center">
[![Version](https://img.shields.io/badge/version-2.0.0--beta.5-blue.svg)](https://github.com/lintsinghua/DeepAudit/releases)
[![Version](https://img.shields.io/badge/version-2.0.0--beta.6-blue.svg)](https://github.com/lintsinghua/DeepAudit/releases)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![React](https://img.shields.io/badge/React-18-61dafb.svg)](https://reactjs.org/)
[![TypeScript](https://img.shields.io/badge/TypeScript-5.7-3178c6.svg)](https://www.typescriptlang.org/)

View File

@ -11,6 +11,13 @@ from .types import LLMConfig, LLMProvider, LLMMessage, LLMRequest, DEFAULT_MODEL
from .factory import LLMFactory
from app.core.config import settings
# json-repair 库用于修复损坏的 JSON
try:
from json_repair import repair_json
JSON_REPAIR_AVAILABLE = True
except ImportError:
JSON_REPAIR_AVAILABLE = False
logger = logging.getLogger(__name__)
@ -458,6 +465,8 @@ Please analyze the following code:
lambda: self._fix_truncated_json(clean_text(text)),
# 6. 激进修复后解析
lambda: json.loads(aggressive_fix_json(text)),
# 7. 使用 json-repair 库作为最终兜底方案
lambda: self._repair_json_with_library(text),
]
last_error = None
@ -574,6 +583,32 @@ Please analyze the following code:
json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
return json.loads(json_str)
def _repair_json_with_library(self, text: str) -> Dict[str, Any]:
"""使用 json-repair 库修复损坏的 JSON兜底方案"""
if not JSON_REPAIR_AVAILABLE:
raise ValueError("json-repair library not available")
# 先尝试提取 JSON 部分
start_idx = text.find('{')
if start_idx == -1:
raise ValueError("No JSON object found for repair")
# 尝试找到最后一个 }
end_idx = text.rfind('}')
if end_idx > start_idx:
json_str = text[start_idx:end_idx + 1]
else:
json_str = text[start_idx:]
# 使用 json-repair 修复
repaired = repair_json(json_str, return_objects=True)
if isinstance(repaired, dict):
logger.info("✅ json-repair 库成功修复 JSON")
return repaired
raise ValueError(f"json-repair returned unexpected type: {type(repaired)}")
def _get_default_response(self) -> Dict[str, Any]:
"""返回默认响应"""
return {

View File

@ -22,4 +22,5 @@ dependencies = [
"reportlab>=4.0.0",
"weasyprint>=66.0",
"jinja2>=3.1.6",
"json-repair>=0.30.0",
]

View File

@ -1,5 +1,11 @@
# This file was autogenerated by uv via the following command:
# uv pip compile requirements.txt -o requirements-lock.txt
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.13.2
# via litellm
aiosignal==1.4.0
# via aiohttp
alembic==1.17.2
# via -r requirements.txt
annotated-doc==0.0.4
@ -9,46 +15,135 @@ annotated-types==0.7.0
anyio==4.11.0
# via
# httpx
# openai
# starlette
# watchfiles
asyncpg==0.31.0
# via -r requirements.txt
bcrypt==5.0.0
# via passlib
attrs==25.4.0
# via
# aiohttp
# jsonschema
# referencing
bcrypt==4.3.0
# via
# -r requirements.txt
# passlib
brotli==1.2.0
# via fonttools
certifi==2025.11.12
# via
# httpcore
# httpx
# requests
cffi==2.0.0
# via cryptography
# via
# cryptography
# weasyprint
charset-normalizer==3.4.4
# via
# reportlab
# requests
click==8.3.1
# via uvicorn
# via
# litellm
# typer-slim
# uvicorn
cryptography==46.0.3
# via python-jose
cssselect2==0.8.0
# via weasyprint
distro==1.9.0
# via openai
dnspython==2.8.0
# via email-validator
ecdsa==0.19.1
# via python-jose
email-validator==2.3.0
# via -r requirements.txt
fastapi==0.122.0
# via -r requirements.txt
fastuuid==0.14.0
# via litellm
filelock==3.20.0
# via huggingface-hub
fonttools==4.61.0
# via weasyprint
frozenlist==1.8.0
# via
# aiohttp
# aiosignal
fsspec==2025.12.0
# via huggingface-hub
greenlet==3.3.0
# via -r requirements.txt
grpcio==1.67.1
# via litellm
h11==0.16.0
# via
# httpcore
# uvicorn
hf-xet==1.2.0
# via huggingface-hub
httpcore==1.0.9
# via httpx
httptools==0.7.1
# via uvicorn
httpx==0.28.1
# via -r requirements.txt
# via
# -r requirements.txt
# huggingface-hub
# litellm
# openai
huggingface-hub==1.2.1
# via tokenizers
idna==3.11
# via
# anyio
# email-validator
# httpx
# requests
# yarl
importlib-metadata==8.7.0
# via litellm
jinja2==3.1.6
# via
# -r requirements.txt
# litellm
jiter==0.12.0
# via openai
json-repair==0.54.2
# via -r requirements.txt
jsonschema==4.25.1
# via litellm
jsonschema-specifications==2025.9.1
# via jsonschema
litellm==1.80.8
# via -r requirements.txt
mako==1.3.10
# via alembic
markupsafe==3.0.3
# via mako
# via
# jinja2
# mako
multidict==6.7.0
# via
# aiohttp
# yarl
openai==2.9.0
# via litellm
packaging==25.0
# via huggingface-hub
passlib==1.7.4
# via -r requirements.txt
pillow==12.0.0
# via
# reportlab
# weasyprint
propcache==0.4.1
# via
# aiohttp
# yarl
pyasn1==0.6.1
# via
# python-jose
@ -59,13 +154,20 @@ pydantic==2.12.4
# via
# -r requirements.txt
# fastapi
# litellm
# openai
# pydantic-settings
pydantic-core==2.41.5
# via pydantic
pydantic-settings==2.12.0
# via -r requirements.txt
pydyf==0.12.1
# via weasyprint
pyphen==0.17.2
# via weasyprint
python-dotenv==1.2.1
# via
# litellm
# pydantic-settings
# uvicorn
python-jose==3.5.0
@ -73,36 +175,90 @@ python-jose==3.5.0
python-multipart==0.0.20
# via -r requirements.txt
pyyaml==6.0.3
# via uvicorn
# via
# huggingface-hub
# uvicorn
referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2025.11.3
# via tiktoken
reportlab==4.4.5
# via -r requirements.txt
requests==2.32.5
# via tiktoken
rpds-py==0.30.0
# via
# jsonschema
# referencing
rsa==4.9.1
# via python-jose
shellingham==1.5.4
# via huggingface-hub
six==1.17.0
# via ecdsa
sniffio==1.3.1
# via anyio
# via
# anyio
# openai
sqlalchemy==2.0.44
# via
# -r requirements.txt
# alembic
starlette==0.50.0
# via fastapi
tiktoken==0.12.0
# via litellm
tinycss2==1.5.1
# via
# cssselect2
# weasyprint
tinyhtml5==2.0.0
# via weasyprint
tokenizers==0.22.1
# via litellm
tqdm==4.67.1
# via
# huggingface-hub
# openai
typer-slim==0.20.0
# via huggingface-hub
typing-extensions==4.15.0
# via
# alembic
# fastapi
# huggingface-hub
# openai
# pydantic
# pydantic-core
# sqlalchemy
# typer-slim
# typing-inspection
typing-inspection==0.4.2
# via
# pydantic
# pydantic-settings
urllib3==2.6.0
# via requests
uvicorn==0.38.0
# via -r requirements.txt
uvloop==0.22.1
# via uvicorn
watchfiles==1.1.1
# via uvicorn
weasyprint==67.0
# via -r requirements.txt
webencodings==0.5.1
# via
# cssselect2
# tinycss2
# tinyhtml5
websockets==15.0.1
# via uvicorn
yarl==1.22.0
# via aiohttp
zipp==3.23.0
# via importlib-metadata
zopfli==0.4.0
# via fonttools

View File

@ -16,3 +16,4 @@ litellm>=1.0.0
reportlab>=4.0.0
weasyprint>=66.0
jinja2>=3.1.6
json-repair>=0.30.0

View File

@ -456,6 +456,55 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0f/e7/aa315e6a749d9b96c2504a1ba0ba031ba2d0517e972ce22682e3fccecb09/cssselect2-0.8.0-py3-none-any.whl", hash = "sha256:46fc70ebc41ced7a32cd42d58b1884d72ade23d21e5a4eaaf022401c13f0e76e", size = 15454, upload-time = "2025-03-05T14:46:06.463Z" },
]
[[package]]
name = "deepaudit-backend"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "alembic" },
{ name = "asyncpg" },
{ name = "bcrypt" },
{ name = "email-validator" },
{ name = "fastapi" },
{ name = "greenlet" },
{ name = "httpx" },
{ name = "jinja2" },
{ name = "json-repair" },
{ name = "litellm" },
{ name = "passlib", extra = ["bcrypt"] },
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "python-jose", extra = ["cryptography"] },
{ name = "python-multipart" },
{ name = "reportlab" },
{ name = "sqlalchemy" },
{ name = "uvicorn", extra = ["standard"] },
{ name = "weasyprint" },
]
[package.metadata]
requires-dist = [
{ name = "alembic" },
{ name = "asyncpg" },
{ name = "bcrypt", specifier = "<5.0.0" },
{ name = "email-validator" },
{ name = "fastapi", specifier = ">=0.100.0" },
{ name = "greenlet" },
{ name = "httpx" },
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "json-repair", specifier = ">=0.30.0" },
{ name = "litellm", specifier = ">=1.0.0" },
{ name = "passlib", extras = ["bcrypt"] },
{ name = "pydantic", specifier = ">=2.0.0" },
{ name = "pydantic-settings" },
{ name = "python-jose", extras = ["cryptography"] },
{ name = "python-multipart" },
{ name = "reportlab", specifier = ">=4.0.0" },
{ name = "sqlalchemy", specifier = ">=2.0.0" },
{ name = "uvicorn", extras = ["standard"] },
{ name = "weasyprint", specifier = ">=66.0" },
]
[[package]]
name = "distro"
version = "1.9.0"
@ -913,6 +962,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/97/9a/3c5391907277f0e55195550cf3fa8e293ae9ee0c00fb402fec1e38c0c82f/jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473", size = 185564, upload-time = "2025-11-09T20:48:50.376Z" },
]
[[package]]
name = "json-repair"
version = "0.54.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ff/05/9fbcd5ffab9c41455e7d80af65a90876718b8ea2fb4525e187ab11836dd4/json_repair-0.54.2.tar.gz", hash = "sha256:4b6b62ce17f1a505b220fa4aadba1fc37dc9c221544f158471efe3775620bad6", size = 38575, upload-time = "2025-11-25T19:31:22.768Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/53/3a/1b4df9adcd69fee9c9e4b439c13e8c866f2fae520054aede7030b2278be9/json_repair-0.54.2-py3-none-any.whl", hash = "sha256:be51cce5dca97e0c24ebdf61a1ede2449a8a7666012de99467bb7b0afb35179b", size = 29322, upload-time = "2025-11-25T19:31:21.492Z" },
]
[[package]]
name = "jsonschema"
version = "4.25.1"
@ -2011,53 +2069,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
]
[[package]]
name = "deepaudit-backend"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "alembic" },
{ name = "asyncpg" },
{ name = "bcrypt" },
{ name = "email-validator" },
{ name = "fastapi" },
{ name = "greenlet" },
{ name = "httpx" },
{ name = "jinja2" },
{ name = "litellm" },
{ name = "passlib", extra = ["bcrypt"] },
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "python-jose", extra = ["cryptography"] },
{ name = "python-multipart" },
{ name = "reportlab" },
{ name = "sqlalchemy" },
{ name = "uvicorn", extra = ["standard"] },
{ name = "weasyprint" },
]
[package.metadata]
requires-dist = [
{ name = "alembic" },
{ name = "asyncpg" },
{ name = "bcrypt", specifier = "<5.0.0" },
{ name = "email-validator" },
{ name = "fastapi", specifier = ">=0.100.0" },
{ name = "greenlet" },
{ name = "httpx" },
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "litellm", specifier = ">=1.0.0" },
{ name = "passlib", extras = ["bcrypt"] },
{ name = "pydantic", specifier = ">=2.0.0" },
{ name = "pydantic-settings" },
{ name = "python-jose", extras = ["cryptography"] },
{ name = "python-multipart" },
{ name = "reportlab", specifier = ">=4.0.0" },
{ name = "sqlalchemy", specifier = ">=2.0.0" },
{ name = "uvicorn", extras = ["standard"] },
{ name = "weasyprint", specifier = ">=66.0" },
]
[[package]]
name = "yarl"
version = "1.22.0"

View File

@ -1,6 +1,6 @@
{
"name": "deep-audit",
"version": "2.0.0-beta.5",
"version": "2.0.0-beta.6",
"type": "module",
"scripts": {
"dev": "vite",