237 lines
8.1 KiB
Python
237 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Валидация анализа главы по блокам через Ollama: framework → insights → application → limitations.
|
||
Вход: merge.json (анализ из этапа 1), вход_главы.json (текст главы и метаданные).
|
||
Выход: один JSON-файл со статусами по каждому блоку (verdict, score, hallucinations, missing_key_points).
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import re
|
||
import sys
|
||
import time
|
||
import urllib.request
|
||
from pathlib import Path
|
||
|
||
OLLAMA_URL = "http://localhost:11434"
|
||
MODEL = "qwen3:14b"
|
||
DIR = Path(__file__).resolve().parent
|
||
DEFAULT_MERGE = DIR.parent / "1_анализ_главы" / "merge.json"
|
||
DEFAULT_CHAPTER = DIR.parent / "1_анализ_главы" / "вход_главы.json"
|
||
|
||
OLLAMA_OPTIONS = {
|
||
"temperature": 0.2,
|
||
"num_ctx": 8500,
|
||
"num_predict": 2048,
|
||
"repeat_penalty": 1.1,
|
||
}
|
||
|
||
BLOCKS = [
|
||
("framework", "validate_framework.txt", "framework_json"),
|
||
("insights", "validate_insights.txt", "insights_json"),
|
||
("application", "validate_application.txt", "application_json"),
|
||
("limitations", "validate_limitations.txt", "limitations_json"),
|
||
]
|
||
|
||
|
||
def load_json(path: Path) -> dict:
|
||
"""Загружает JSON из файла."""
|
||
with open(path, encoding="utf-8") as f:
|
||
return json.load(f)
|
||
|
||
|
||
def load_prompt(filename: str) -> str:
|
||
"""Загружает шаблон промпта из файла."""
|
||
with open(DIR / filename, encoding="utf-8") as f:
|
||
return f.read()
|
||
|
||
|
||
def substitute_prompt(
|
||
prompt: str,
|
||
book_title: str,
|
||
chapter_title: str,
|
||
chapter_text: str,
|
||
block_json: str,
|
||
block_placeholder: str,
|
||
) -> str:
|
||
"""Подставляет в промпт поля главы и JSON блока. block_placeholder — например {framework_json}."""
|
||
return (
|
||
prompt.replace("{book_title}", book_title)
|
||
.replace("{chapter_title}", chapter_title)
|
||
.replace("{chapter_text}", chapter_text)
|
||
.replace(block_placeholder, block_json)
|
||
)
|
||
|
||
|
||
def extract_json_from_response(text: str) -> dict:
|
||
"""Достаёт JSON из ответа модели (может быть обёрнут в ```json ... ```)."""
|
||
text = text.strip()
|
||
match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
|
||
if match:
|
||
text = match.group(1).strip()
|
||
return json.loads(text)
|
||
|
||
|
||
def call_ollama(prompt: str) -> str:
|
||
"""Вызывает Ollama /api/chat и возвращает content ответа."""
|
||
body = json.dumps(
|
||
{
|
||
"model": MODEL,
|
||
"messages": [{"role": "user", "content": prompt}],
|
||
"stream": False,
|
||
"format": "json",
|
||
"options": OLLAMA_OPTIONS,
|
||
"keep_alive": 0,
|
||
},
|
||
ensure_ascii=False,
|
||
).encode("utf-8")
|
||
req = urllib.request.Request(
|
||
f"{OLLAMA_URL}/api/chat",
|
||
data=body,
|
||
headers={"Content-Type": "application/json"},
|
||
method="POST",
|
||
)
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=None) as resp:
|
||
data = json.load(resp)
|
||
return data.get("message", {}).get("content", "")
|
||
except urllib.error.HTTPError as e:
|
||
body_b = b""
|
||
if e.fp:
|
||
try:
|
||
body_b = e.fp.read()[:1000]
|
||
except Exception:
|
||
pass
|
||
raise RuntimeError(
|
||
f"Ollama HTTP {e.code}: {e.reason}. Body: {body_b.decode('utf-8', errors='replace')}"
|
||
) from e
|
||
|
||
|
||
def main() -> int:
|
||
"""Последовательно валидирует 4 блока и пишет один JSON со статусами."""
|
||
parser = argparse.ArgumentParser(
|
||
description="Валидация анализа главы по блокам через Ollama. Выход — JSON со статусами."
|
||
)
|
||
parser.add_argument(
|
||
"--merge",
|
||
type=Path,
|
||
default=DEFAULT_MERGE,
|
||
help=f"Путь к merge.json (по умолчанию: {DEFAULT_MERGE})",
|
||
)
|
||
parser.add_argument(
|
||
"--chapter",
|
||
type=Path,
|
||
default=DEFAULT_CHAPTER,
|
||
help=f"Путь к вход_главы.json (по умолчанию: {DEFAULT_CHAPTER})",
|
||
)
|
||
parser.add_argument(
|
||
"-o",
|
||
"--output",
|
||
type=Path,
|
||
default=DIR / "validation_status.json",
|
||
help="Путь к выходному JSON со статусами (по умолчанию: validation_status.json)",
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
if not args.merge.is_file():
|
||
print(f"Файл не найден: {args.merge}", file=sys.stderr)
|
||
return 1
|
||
if not args.chapter.is_file():
|
||
print(f"Файл не найден: {args.chapter}", file=sys.stderr)
|
||
return 1
|
||
|
||
print("Загрузка merge.json и вход_главы.json...")
|
||
merge = load_json(args.merge)
|
||
chapter = load_json(args.chapter)
|
||
book_title = chapter.get("book_title", "")
|
||
chapter_title = chapter.get("chapter_title", "")
|
||
chapter_text = chapter.get("chapter_text", "")
|
||
|
||
results: dict = {}
|
||
|
||
for block_name, prompt_file, json_placeholder in BLOCKS:
|
||
block_data = merge.get(block_name)
|
||
if block_data is None:
|
||
print(f"Блок «{block_name}» отсутствует в merge.json, пропуск.", file=sys.stderr)
|
||
results[block_name] = {
|
||
"verdict": "skipped",
|
||
"score": None,
|
||
"hallucinations": [],
|
||
"missing_key_points": [],
|
||
"error": "block not found in merge",
|
||
}
|
||
continue
|
||
|
||
prompt_tpl = load_prompt(prompt_file)
|
||
block_json_str = json.dumps(block_data, ensure_ascii=False, indent=2)
|
||
placeholder = "{" + json_placeholder + "}" # {framework_json}, {insights_json}, ...
|
||
prompt = substitute_prompt(
|
||
prompt_tpl,
|
||
book_title,
|
||
chapter_title,
|
||
chapter_text,
|
||
block_json_str,
|
||
placeholder,
|
||
)
|
||
|
||
print(f"Валидация блока «{block_name}»...")
|
||
t0 = time.monotonic()
|
||
try:
|
||
raw = call_ollama(prompt)
|
||
except Exception as e:
|
||
print(f"Ошибка вызова Ollama на блоке «{block_name}»: {e}", file=sys.stderr)
|
||
results[block_name] = {
|
||
"verdict": "error",
|
||
"score": None,
|
||
"hallucinations": [],
|
||
"missing_key_points": [],
|
||
"error": str(e),
|
||
}
|
||
continue
|
||
elapsed = time.monotonic() - t0
|
||
print(f" Ответ за {elapsed:.1f} сек ({elapsed / 60:.1f} мин)")
|
||
|
||
try:
|
||
block_result = extract_json_from_response(raw)
|
||
except json.JSONDecodeError as e:
|
||
print(
|
||
f"Не удалось распарсить JSON в блоке «{block_name}»: {e}",
|
||
file=sys.stderr,
|
||
)
|
||
print("Первые 500 символов ответа:", raw[:500], file=sys.stderr)
|
||
results[block_name] = {
|
||
"verdict": "error",
|
||
"score": None,
|
||
"hallucinations": [],
|
||
"missing_key_points": [],
|
||
"error": f"JSON decode: {e}",
|
||
}
|
||
continue
|
||
|
||
if not isinstance(block_result, dict):
|
||
results[block_name] = {
|
||
"verdict": "error",
|
||
"score": None,
|
||
"hallucinations": [],
|
||
"missing_key_points": [],
|
||
"error": f"expected dict, got {type(block_result).__name__}",
|
||
}
|
||
continue
|
||
|
||
results[block_name] = {
|
||
"verdict": block_result.get("verdict", "unknown"),
|
||
"score": block_result.get("score"),
|
||
"hallucinations": block_result.get("hallucinations", []),
|
||
"missing_key_points": block_result.get("missing_key_points", []),
|
||
}
|
||
|
||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||
with open(args.output, "w", encoding="utf-8") as f:
|
||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||
print(f"Записано: {args.output}")
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|