fix

2026-02-01 19:09:26 +03:00
parent 0b01f30021
commit e238813980
3 changed files with 157 additions and 0 deletions
--- a/1_анализ_главы/merge_jsons_old.py
+++ b/1_анализ_главы/merge_jsons_old.py
--- a/1_анализ_главы/run_chapter_analysis_ollama.py
+++ b/1_анализ_главы/run_chapter_analysis_ollama.py
@@ -0,0 +1,156 @@
 #!/usr/bin/env python3
 """
 Полный анализ главы по блокам через Ollama: framework → insights → application → limitations.
 Вход: вход_главы.json, промпты extract_framework_v2.txt, extract_insights_v3.txt,
      extract_application_v2.txt, extract_limitations_v3.txt.
 Выход: только merge.json (объединённый JSON всех четырёх блоков).
 """
 import json
 import re
 import sys
 import time
 import urllib.request
 from pathlib import Path
 OLLAMA_URL = "http://localhost:11434"
 MODEL = "qwen3:14b"
 DIR = Path(__file__).resolve().parent
 OLLAMA_OPTIONS = {
    "temperature": 0.3,
    "num_ctx": 8500,
    "num_predict": 4096,
    "repeat_penalty": 1.1,
 }
 BLOCKS = [
    ("framework", "extract_framework_v2.txt", False),   # без previous_blocks_json
    ("insights", "extract_insights_v3.txt", True),
    ("application", "extract_application_v2.txt", True),
    ("limitations", "extract_limitations_v3.txt", True),
 ]
 def load_input() -> dict:
    """Загружает входной JSON главы."""
    with open(DIR / "вход_главы.json", encoding="utf-8") as f:
        return json.load(f)
 def load_prompt(filename: str) -> str:
    """Загружает шаблон промпта из файла."""
    with open(DIR / filename, encoding="utf-8") as f:
        return f.read()
 def substitute_prompt(
    prompt: str,
    data: dict,
    previous_blocks_json: str | None = None,
 ) -> str:
    """Подставляет в промпт поля главы и при необходимости накопленный JSON."""
    out = (
        prompt.replace("{book_title}", data.get("book_title", ""))
        .replace("{chapter_title}", data.get("chapter_title", ""))
        .replace("{chapter_text}", data.get("chapter_text", ""))
    )
    if previous_blocks_json is not None:
        out = out.replace("{previous_blocks_json}", previous_blocks_json)
    return out
 def extract_json_from_response(text: str) -> dict:
    """Достаёт JSON из ответа модели (может быть обёрнут в ```json ... ```)."""
    text = text.strip()
    match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
    if match:
        text = match.group(1).strip()
    return json.loads(text)
 def call_ollama(prompt: str) -> str:
    """Вызывает Ollama /api/chat и возвращает content ответа."""
    body = json.dumps(
        {
            "model": MODEL,
            "messages": [{"role": "user", "content": prompt}],
            "stream": False,
            "format": "json",
            "options": OLLAMA_OPTIONS,
            "keep_alive": 0,
        },
        ensure_ascii=False,
    ).encode("utf-8")
    req = urllib.request.Request(
        f"{OLLAMA_URL}/api/chat",
        data=body,
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=None) as resp:
            data = json.load(resp)
        return data.get("message", {}).get("content", "")
    except urllib.error.HTTPError as e:
        body_b = b""
        if e.fp:
            try:
                body_b = e.fp.read()[:1000]
            except Exception:
                pass
        raise RuntimeError(
            f"Ollama HTTP {e.code}: {e.reason}. Body: {body_b.decode('utf-8', errors='replace')}"
        ) from e
 def main() -> int:
    """Последовательно генерирует 4 блока, накапливает результат, пишет merge.json."""
    print("Загрузка вход_главы.json и промптов...")
    data = load_input()
    merged: dict = {}
    for i, (block_name, prompt_file, use_previous) in enumerate(BLOCKS, start=1):
        prompt_tpl = load_prompt(prompt_file)
        previous_json = (
            json.dumps(merged, ensure_ascii=False, indent=2) if use_previous else None
        )
        prompt = substitute_prompt(prompt_tpl, data, previous_json)
        print(f"[{i}/4] Блок «{block_name}» — вызов Ollama {MODEL}...")
        t0 = time.monotonic()
        try:
            raw = call_ollama(prompt)
        except Exception as e:
            print(f"Ошибка вызова Ollama на блоке «{block_name}»: {e}", file=sys.stderr)
            return 1
        elapsed = time.monotonic() - t0
        print(f"      Ответ за {elapsed:.1f} сек ({elapsed / 60:.1f} мин)")
        try:
            block_data = extract_json_from_response(raw)
        except json.JSONDecodeError as e:
            print(
                f"Не удалось распарсить JSON в блоке «{block_name}»: {e}",
                file=sys.stderr,
            )
            print("Первые 500 символов ответа:", raw[:500], file=sys.stderr)
            return 1
        if not isinstance(block_data, dict):
            print(
                f"Блок «{block_name}» вернул не объект: {type(block_data).__name__}",
                file=sys.stderr,
            )
            return 1
        merged.update(block_data)
    out_path = DIR / "merge.json"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(merged, f, ensure_ascii=False, indent=2)
    print(f"Записано: {out_path}")
    return 0
 if __name__ == "__main__":
    sys.exit(main())
--- a/1_анализ_главы/run_framework_ollama.py
+++ b/1_анализ_главы/run_framework_ollama.py
@@ -58,6 +58,7 @@ def call_ollama(prompt: str) -> str:
            "stream": False,
            "format": "json",
            "options": OLLAMA_OPTIONS,
            "keep_alive": 0,  # выгрузить модель из памяти сразу после ответа
        },
        ensure_ascii=False,
    ).encode("utf-8")