tech/1_анализ_главы/run_framework_ollama.py

#!/usr/bin/env python3
"""
Один запуск блока framework через Ollama (qwen3:14b) для сравнения с эталоном выход_frame.json.
Вход: вход_главы.json, промпт extract_framework_v2.txt.
Выход: выход_frame_ollama.json и краткое сравнение с выход_frame.json.
"""

import json
import re
import sys
import time
import urllib.request
from pathlib import Path

OLLAMA_URL = "http://localhost:11434"
MODEL = "qwen3:14b"
DIR = Path(__file__).resolve().parent

# Параметры для стабильного структурированного вывода (JSON framework)
OLLAMA_OPTIONS = {
    "temperature": 0.3,   # ниже = детерминированнее, меньше «творчества» и смены языка
    "num_ctx": 8500,      # контекст под длинную главу
    "num_predict": 4096,  # лимит токенов ответа под полный frame
    "repeat_penalty": 1.1,
}


def load_input() -> dict:
    with open(DIR / "вход_главы.json", encoding="utf-8") as f:
        return json.load(f)


def load_prompt() -> str:
    with open(DIR / "extract_framework_v2.txt", encoding="utf-8") as f:
        return f.read()


def substitute_prompt(prompt: str, data: dict) -> str:
    return prompt.replace("{book_title}", data.get("book_title", "")).replace(
        "{chapter_title}", data.get("chapter_title", "")
    ).replace("{chapter_text}", data.get("chapter_text", ""))


def extract_json_from_response(text: str) -> dict:
    """Достаёт JSON из ответа модели (может быть обёрнут в ```json ... ```)."""
    text = text.strip()
    match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
    if match:
        text = match.group(1).strip()
    return json.loads(text)


def call_ollama(prompt: str) -> str:
    body = json.dumps(
        {
            "model": MODEL,
            "messages": [{"role": "user", "content": prompt}],
            "stream": False,
            "format": "json",
            "options": OLLAMA_OPTIONS,
            "keep_alive": 0,  # выгрузить модель из памяти сразу после ответа
        },
        ensure_ascii=False,
    ).encode("utf-8")
    req = urllib.request.Request(
        f"{OLLAMA_URL}/api/chat",
        data=body,
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    try:
        # Таймаут отключён — ждём завершения генерации (может быть 10+ минут на CPU)
        with urllib.request.urlopen(req, timeout=None) as resp:
            data = json.load(resp)
        return data.get("message", {}).get("content", "")
    except urllib.error.HTTPError as e:
        body = ""
        if e.fp:
            try:
                body = e.fp.read().decode("utf-8", errors="replace")[:1000]
            except Exception:
                pass
        raise RuntimeError(f"Ollama HTTP {e.code}: {e.reason}. Body: {body}") from e


def main() -> int:
    print("Загрузка вход_главы.json и промпта...")
    inp = load_input()
    prompt_tpl = load_prompt()
    prompt = substitute_prompt(prompt_tpl, inp)
    print(f"Вызов Ollama {MODEL} (таймаут отключён, ждём завершения)...")
    t0 = time.monotonic()
    try:
        raw = call_ollama(prompt)
    except Exception as e:
        print(f"Ошибка вызова Ollama: {e}", file=sys.stderr)
        return 1
    elapsed = time.monotonic() - t0
    print(f"Ответ получен за {elapsed:.1f} сек ({elapsed / 60:.1f} мин)")
    try:
        result = extract_json_from_response(raw)
    except json.JSONDecodeError as e:
        print(f"Не удалось распарсить JSON из ответа: {e}", file=sys.stderr)
        print("Первые 500 символов ответа:", raw[:500], file=sys.stderr)
        return 1
    out_path = DIR / "выход_frame_ollama.json"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(result, f, ensure_ascii=False, indent=2)
    print(f"Результат сохранён: {out_path}")

    # Сравнение с эталоном
    ref_path = DIR / "выход_frame.json"
    if ref_path.exists():
        with open(ref_path, encoding="utf-8") as f:
            ref = json.load(f)
        fw_ollama = result.get("framework", {})
        fw_ref = ref.get("framework", {})
        terms_o = len(fw_ollama.get("terms", {}))
        terms_r = len(fw_ref.get("terms", {}))
        principles_o = len(fw_ollama.get("principles", []))
        principles_r = len(fw_ref.get("principles", []))
        print("\n--- Сравнение с эталоном выход_frame.json ---")
        print(f"  principles: эталон {principles_r}, Ollama {principles_o}")
        print(f"  terms:      эталон {terms_r}, Ollama {terms_o}")
    return 0


if __name__ == "__main__":
    sys.exit(main())