Files
tech/1_анализ_главы/run_chapter_analysis_ollama.py
2026-02-01 19:09:26 +03:00

157 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Полный анализ главы по блокам через Ollama: framework → insights → application → limitations.
Вход: вход_главы.json, промпты extract_framework_v2.txt, extract_insights_v3.txt,
extract_application_v2.txt, extract_limitations_v3.txt.
Выход: только merge.json (объединённый JSON всех четырёх блоков).
"""
import json
import re
import sys
import time
import urllib.request
from pathlib import Path
OLLAMA_URL = "http://localhost:11434"
MODEL = "qwen3:14b"
DIR = Path(__file__).resolve().parent
OLLAMA_OPTIONS = {
"temperature": 0.3,
"num_ctx": 8500,
"num_predict": 4096,
"repeat_penalty": 1.1,
}
BLOCKS = [
("framework", "extract_framework_v2.txt", False), # без previous_blocks_json
("insights", "extract_insights_v3.txt", True),
("application", "extract_application_v2.txt", True),
("limitations", "extract_limitations_v3.txt", True),
]
def load_input() -> dict:
"""Загружает входной JSON главы."""
with open(DIR / "вход_главы.json", encoding="utf-8") as f:
return json.load(f)
def load_prompt(filename: str) -> str:
"""Загружает шаблон промпта из файла."""
with open(DIR / filename, encoding="utf-8") as f:
return f.read()
def substitute_prompt(
prompt: str,
data: dict,
previous_blocks_json: str | None = None,
) -> str:
"""Подставляет в промпт поля главы и при необходимости накопленный JSON."""
out = (
prompt.replace("{book_title}", data.get("book_title", ""))
.replace("{chapter_title}", data.get("chapter_title", ""))
.replace("{chapter_text}", data.get("chapter_text", ""))
)
if previous_blocks_json is not None:
out = out.replace("{previous_blocks_json}", previous_blocks_json)
return out
def extract_json_from_response(text: str) -> dict:
"""Достаёт JSON из ответа модели (может быть обёрнут в ```json ... ```)."""
text = text.strip()
match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
if match:
text = match.group(1).strip()
return json.loads(text)
def call_ollama(prompt: str) -> str:
"""Вызывает Ollama /api/chat и возвращает content ответа."""
body = json.dumps(
{
"model": MODEL,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"format": "json",
"options": OLLAMA_OPTIONS,
"keep_alive": 0,
},
ensure_ascii=False,
).encode("utf-8")
req = urllib.request.Request(
f"{OLLAMA_URL}/api/chat",
data=body,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=None) as resp:
data = json.load(resp)
return data.get("message", {}).get("content", "")
except urllib.error.HTTPError as e:
body_b = b""
if e.fp:
try:
body_b = e.fp.read()[:1000]
except Exception:
pass
raise RuntimeError(
f"Ollama HTTP {e.code}: {e.reason}. Body: {body_b.decode('utf-8', errors='replace')}"
) from e
def main() -> int:
"""Последовательно генерирует 4 блока, накапливает результат, пишет merge.json."""
print("Загрузка вход_главы.json и промптов...")
data = load_input()
merged: dict = {}
for i, (block_name, prompt_file, use_previous) in enumerate(BLOCKS, start=1):
prompt_tpl = load_prompt(prompt_file)
previous_json = (
json.dumps(merged, ensure_ascii=False, indent=2) if use_previous else None
)
prompt = substitute_prompt(prompt_tpl, data, previous_json)
print(f"[{i}/4] Блок «{block_name}» — вызов Ollama {MODEL}...")
t0 = time.monotonic()
try:
raw = call_ollama(prompt)
except Exception as e:
print(f"Ошибка вызова Ollama на блоке «{block_name}»: {e}", file=sys.stderr)
return 1
elapsed = time.monotonic() - t0
print(f" Ответ за {elapsed:.1f} сек ({elapsed / 60:.1f} мин)")
try:
block_data = extract_json_from_response(raw)
except json.JSONDecodeError as e:
print(
f"Не удалось распарсить JSON в блоке «{block_name}»: {e}",
file=sys.stderr,
)
print("Первые 500 символов ответа:", raw[:500], file=sys.stderr)
return 1
if not isinstance(block_data, dict):
print(
f"Блок «{block_name}» вернул не объект: {type(block_data).__name__}",
file=sys.stderr,
)
return 1
merged.update(block_data)
out_path = DIR / "merge.json"
with open(out_path, "w", encoding="utf-8") as f:
json.dump(merged, f, ensure_ascii=False, indent=2)
print(f"Записано: {out_path}")
return 0
if __name__ == "__main__":
sys.exit(main())