init

2026-02-01 17:01:21 +03:00
commit 9575eaf8ee
144 changed files with 24025 additions and 0 deletions
--- a/1_анализ_главы/run_framework_ollama.py
+++ b/1_анализ_главы/run_framework_ollama.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""
+Один запуск блока framework через Ollama (qwen3:14b) для сравнения с эталоном выход_frame.json.
+Вход: вход_главы.json, промпт extract_framework_v2.txt.
+Выход: выход_frame_ollama.json и краткое сравнение с выход_frame.json.
+"""
+
+import json
+import re
+import sys
+import time
+import urllib.request
+from pathlib import Path
+
+OLLAMA_URL = "http://localhost:11434"
+MODEL = "qwen3:14b"
+DIR = Path(__file__).resolve().parent
+
+
+def load_input() -> dict:
+    with open(DIR / "вход_главы.json", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def load_prompt() -> str:
+    with open(DIR / "extract_framework_v2.txt", encoding="utf-8") as f:
+        return f.read()
+
+
+def substitute_prompt(prompt: str, data: dict) -> str:
+    return prompt.replace("{book_title}", data.get("book_title", "")).replace(
+        "{chapter_title}", data.get("chapter_title", "")
+    ).replace("{chapter_text}", data.get("chapter_text", ""))
+
+
+def extract_json_from_response(text: str) -> dict:
+    """Достаёт JSON из ответа модели (может быть обёрнут в ```json ... ```)."""
+    text = text.strip()
+    match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
+    if match:
+        text = match.group(1).strip()
+    return json.loads(text)
+
+
+def call_ollama(prompt: str) -> str:
+    body = json.dumps(
+        {
+            "model": MODEL,
+            "messages": [{"role": "user", "content": prompt}],
+            "stream": False,
+            "format": "json",
+        },
+        ensure_ascii=False,
+    ).encode("utf-8")
+    req = urllib.request.Request(
+        f"{OLLAMA_URL}/api/chat",
+        data=body,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    try:
+        # Таймаут отключён — ждём завершения генерации (может быть 10+ минут на CPU)
+        with urllib.request.urlopen(req, timeout=None) as resp:
+            data = json.load(resp)
+        return data.get("message", {}).get("content", "")
+    except urllib.error.HTTPError as e:
+        body = ""
+        if e.fp:
+            try:
+                body = e.fp.read().decode("utf-8", errors="replace")[:1000]
+            except Exception:
+                pass
+        raise RuntimeError(f"Ollama HTTP {e.code}: {e.reason}. Body: {body}") from e
+
+
+def main() -> int:
+    print("Загрузка вход_главы.json и промпта...")
+    inp = load_input()
+    prompt_tpl = load_prompt()
+    prompt = substitute_prompt(prompt_tpl, inp)
+    print(f"Вызов Ollama {MODEL} (таймаут отключён, ждём завершения)...")
+    t0 = time.monotonic()
+    try:
+        raw = call_ollama(prompt)
+    except Exception as e:
+        print(f"Ошибка вызова Ollama: {e}", file=sys.stderr)
+        return 1
+    elapsed = time.monotonic() - t0
+    print(f"Ответ получен за {elapsed:.1f} сек ({elapsed / 60:.1f} мин)")
+    try:
+        result = extract_json_from_response(raw)
+    except json.JSONDecodeError as e:
+        print(f"Не удалось распарсить JSON из ответа: {e}", file=sys.stderr)
+        print("Первые 500 символов ответа:", raw[:500], file=sys.stderr)
+        return 1
+    out_path = DIR / "выход_frame_ollama.json"
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(result, f, ensure_ascii=False, indent=2)
+    print(f"Результат сохранён: {out_path}")
+
+    # Сравнение с эталоном
+    ref_path = DIR / "выход_frame.json"
+    if ref_path.exists():
+        with open(ref_path, encoding="utf-8") as f:
+            ref = json.load(f)
+        fw_ollama = result.get("framework", {})
+        fw_ref = ref.get("framework", {})
+        terms_o = len(fw_ollama.get("terms", {}))
+        terms_r = len(fw_ref.get("terms", {}))
+        principles_o = len(fw_ollama.get("principles", []))
+        principles_r = len(fw_ref.get("principles", []))
+        print("\n--- Сравнение с эталоном выход_frame.json ---")
+        print(f"  principles: эталон {principles_r}, Ollama {principles_o}")
+        print(f"  terms:      эталон {terms_r}, Ollama {terms_o}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())