init
This commit is contained in:
45
8_сохранение_postgres/README.md
Normal file
45
8_сохранение_postgres/README.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# Сохранение в Postgres (шаг 8)
|
||||
|
||||
Сохранение анализа главы и тегов в реляционную БД: книга, глава, запись в `chapter_analyses`, теги в `tags`, связи в `chapter_tags`. Вход — merged JSON (шаг 5) с метаданными из этапа 1.
|
||||
|
||||
## Схема
|
||||
|
||||
Перед первым запуском применить схему:
|
||||
|
||||
```bash
|
||||
psql -h localhost -U n8n -d n8n -f schema.sql
|
||||
# или через docker: docker exec -i postgres psql -U n8n -d n8n < 8_сохранение_postgres/schema.sql
|
||||
```
|
||||
|
||||
Таблицы: `books`, `chapters`, `chapter_analyses`, `tags`, `chapter_tags` (см. ARCHITECTURE_SUMMARY.md).
|
||||
|
||||
## Вход
|
||||
|
||||
- **merged JSON** (шаг 5): book_id, chapter_id, chapter_number, chapter_title, book_title, author, framework, insights, application, limitations, tags. Метаданные должны быть подмешаны через `--input-chapter` на шаге 5.
|
||||
|
||||
## Действия
|
||||
|
||||
1. Upsert книги (books) по book_id.
|
||||
2. Upsert главы (chapters) по chapter_id.
|
||||
3. Insert/update анализа в chapter_analyses (analysis_result = framework + insights + application + limitations).
|
||||
4. Get-or-create тегов в tags (name, category); связи в chapter_tags (chapter_id, tag_id, confidence, validated, source='ai_validation').
|
||||
|
||||
## Использование
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt # один раз
|
||||
python3 save_to_postgres.py 5_мерж_анализа_и_тегов/merged_with_tags.json [--validation-score 0.95]
|
||||
```
|
||||
|
||||
## Переменные окружения
|
||||
|
||||
| Переменная | По умолчанию | Описание |
|
||||
|------------|--------------|----------|
|
||||
| `DATABASE_URL` | — | Полный URL (postgresql://user:pass@host:port/dbname). Если задан, остальные игнорируются. |
|
||||
| `POSTGRES_HOST` | localhost | Хост Postgres |
|
||||
| `POSTGRES_PORT` | 5432 | Порт |
|
||||
| `POSTGRES_USER` | n8n | Пользователь |
|
||||
| `POSTGRES_PASSWORD` | n8n_password | Пароль |
|
||||
| `POSTGRES_DB` | n8n | Имя БД |
|
||||
|
||||
В Docker используйте `POSTGRES_HOST=postgres` (имя сервиса).
|
||||
1
8_сохранение_postgres/requirements.txt
Normal file
1
8_сохранение_postgres/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
psycopg2-binary>=2.9.9
|
||||
252
8_сохранение_postgres/save_to_postgres.py
Normal file
252
8_сохранение_postgres/save_to_postgres.py
Normal file
@@ -0,0 +1,252 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Шаг 8: сохранение анализа главы и тегов в Postgres.
|
||||
|
||||
Вход: merged JSON (шаг 5) с полями book_id, chapter_id, chapter_number, chapter_title,
|
||||
book_title, author, framework, insights, application, limitations, tags.
|
||||
|
||||
Действия: upsert книги и главы, запись в chapter_analyses, get-or-create тегов в tags,
|
||||
связи в chapter_tags. Ожидается, что схема уже применена (schema.sql).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError:
|
||||
print("Ошибка: установите psycopg2-binary (pip install psycopg2-binary).", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def env(name: str, default: str) -> str:
|
||||
"""Читает переменную окружения или default."""
|
||||
return os.environ.get(name, default).strip()
|
||||
|
||||
|
||||
def get_connection(database_url: str | None = None):
|
||||
"""Создаёт подключение к Postgres."""
|
||||
if database_url:
|
||||
return psycopg2.connect(database_url)
|
||||
host = env("POSTGRES_HOST", "localhost")
|
||||
port = env("POSTGRES_PORT", "5432")
|
||||
user = env("POSTGRES_USER", "n8n")
|
||||
password = env("POSTGRES_PASSWORD", "n8n_password")
|
||||
dbname = env("POSTGRES_DB", "n8n")
|
||||
return psycopg2.connect(
|
||||
host=host,
|
||||
port=port,
|
||||
user=user,
|
||||
password=password,
|
||||
dbname=dbname,
|
||||
)
|
||||
|
||||
|
||||
def upsert_book(conn, book_id: str, title: str | None, author: str | None) -> None:
|
||||
"""Вставка или обновление книги."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO books (id, title, author, updated_at)
|
||||
VALUES (%s, %s, %s, NOW())
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
title = COALESCE(EXCLUDED.title, books.title),
|
||||
author = COALESCE(EXCLUDED.author, books.author),
|
||||
updated_at = NOW()
|
||||
""",
|
||||
(book_id, title or "", author or ""),
|
||||
)
|
||||
|
||||
|
||||
def upsert_chapter(
|
||||
conn,
|
||||
chapter_id: str,
|
||||
book_id: str,
|
||||
chapter_number: int,
|
||||
chapter_title: str | None,
|
||||
content: str | None = None,
|
||||
) -> None:
|
||||
"""Вставка или обновление главы."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO chapters (id, book_id, chapter_number, chapter_title, content)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
book_id = EXCLUDED.book_id,
|
||||
chapter_number = EXCLUDED.chapter_number,
|
||||
chapter_title = COALESCE(EXCLUDED.chapter_title, chapters.chapter_title),
|
||||
content = COALESCE(EXCLUDED.content, chapters.content)
|
||||
""",
|
||||
(chapter_id, book_id, chapter_number, chapter_title or "", content),
|
||||
)
|
||||
|
||||
|
||||
def upsert_chapter_analysis(
|
||||
conn,
|
||||
chapter_id: str,
|
||||
analysis_result: dict[str, Any],
|
||||
validation_score: float | None = None,
|
||||
) -> None:
|
||||
"""Вставка или обновление анализа главы."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO chapter_analyses (chapter_id, analysis_result, validation_score, validated_at)
|
||||
VALUES (%s, %s, %s, NOW())
|
||||
ON CONFLICT (chapter_id) DO UPDATE SET
|
||||
analysis_result = EXCLUDED.analysis_result,
|
||||
validation_score = COALESCE(EXCLUDED.validation_score, chapter_analyses.validation_score),
|
||||
validated_at = NOW()
|
||||
""",
|
||||
(chapter_id, json.dumps(analysis_result, ensure_ascii=False), validation_score),
|
||||
)
|
||||
|
||||
|
||||
def get_or_create_tag(conn, name: str, category: str) -> str:
|
||||
"""Возвращает id тега по имени; создаёт тег при отсутствии."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT id FROM tags WHERE name = %s", (name,))
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
return str(row[0])
|
||||
tag_id = str(uuid.uuid4())
|
||||
cur.execute(
|
||||
"INSERT INTO tags (id, name, category) VALUES (%s, %s, %s)",
|
||||
(tag_id, name, category),
|
||||
)
|
||||
return tag_id
|
||||
|
||||
|
||||
def upsert_chapter_tags(
|
||||
conn,
|
||||
chapter_id: str,
|
||||
tags_flat: list[tuple[str, str, float | None]],
|
||||
) -> None:
|
||||
"""Связывает главу с тегами (get-or-create тегов, вставка в chapter_tags)."""
|
||||
with conn.cursor() as cur:
|
||||
for name, category, confidence in tags_flat:
|
||||
tag_id = get_or_create_tag(conn, name, category)
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO chapter_tags (chapter_id, tag_id, confidence, validated, source)
|
||||
VALUES (%s, %s, %s, true, 'ai_validation')
|
||||
ON CONFLICT (chapter_id, tag_id) DO UPDATE SET
|
||||
confidence = EXCLUDED.confidence,
|
||||
validated = true
|
||||
""",
|
||||
(chapter_id, tag_id, confidence),
|
||||
)
|
||||
|
||||
|
||||
def tags_from_merged(merged: dict[str, Any]) -> list[tuple[str, str, float | None]]:
|
||||
"""(name, category, confidence) из merged JSON."""
|
||||
result: list[tuple[str, str, float | None]] = []
|
||||
for category, items in (merged.get("tags") or {}).items():
|
||||
for item in items if isinstance(items, list) else []:
|
||||
if isinstance(item, dict) and "tag" in item:
|
||||
result.append((
|
||||
item["tag"],
|
||||
category,
|
||||
item.get("confidence"),
|
||||
))
|
||||
return result
|
||||
|
||||
|
||||
def analysis_result_from_merged(merged: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Блок для chapter_analyses.analysis_result (framework, insights, application, limitations)."""
|
||||
return {
|
||||
"framework": merged.get("framework"),
|
||||
"insights": merged.get("insights"),
|
||||
"application": merged.get("application"),
|
||||
"limitations": merged.get("limitations"),
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Шаг 8: сохранить анализ и теги в Postgres (merged JSON → books, chapters, chapter_analyses, tags, chapter_tags).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"merged_json",
|
||||
type=Path,
|
||||
help="Путь к merged JSON (шаг 5).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--validation-score",
|
||||
type=float,
|
||||
default=None,
|
||||
help="Оценка валидации (опционально).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--database-url",
|
||||
default=env("DATABASE_URL", ""),
|
||||
help="URL подключения к Postgres (или POSTGRES_HOST/USER/PASSWORD/DB).",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.merged_json.exists():
|
||||
print(f"Ошибка: файл не найден: {args.merged_json}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
with open(args.merged_json, encoding="utf-8") as f:
|
||||
merged = json.load(f)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Ошибка разбора JSON: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
required = ("book_id", "chapter_id", "chapter_number", "chapter_title")
|
||||
missing = [k for k in required if not merged.get(k)]
|
||||
if missing:
|
||||
print(f"Ошибка: в merged нет полей (нужен мерж с --input-chapter): {missing}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
database_url = args.database_url or None
|
||||
try:
|
||||
conn = get_connection(database_url)
|
||||
except Exception as e:
|
||||
print(f"Ошибка подключения к Postgres: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
upsert_book(
|
||||
conn,
|
||||
merged["book_id"],
|
||||
merged.get("book_title"),
|
||||
merged.get("author"),
|
||||
)
|
||||
upsert_chapter(
|
||||
conn,
|
||||
merged["chapter_id"],
|
||||
merged["book_id"],
|
||||
int(merged["chapter_number"]),
|
||||
merged.get("chapter_title"),
|
||||
merged.get("chapter_text"), # если подмешали в мерж
|
||||
)
|
||||
upsert_chapter_analysis(
|
||||
conn,
|
||||
merged["chapter_id"],
|
||||
analysis_result_from_merged(merged),
|
||||
args.validation_score,
|
||||
)
|
||||
upsert_chapter_tags(conn, merged["chapter_id"], tags_from_merged(merged))
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"Ошибка: {e}", file=sys.stderr)
|
||||
return 1
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
print(f"Глава {merged['chapter_id']} сохранена в Postgres (books, chapters, chapter_analyses, tags, chapter_tags).", flush=True)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
56
8_сохранение_postgres/schema.sql
Normal file
56
8_сохранение_postgres/schema.sql
Normal file
@@ -0,0 +1,56 @@
|
||||
-- Минимальная схема для этапа 8: книги, главы, анализы глав, теги, связи глава–тег.
|
||||
-- Запускать один раз (например: psql -f schema.sql или через скрипт инициализации).
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
|
||||
|
||||
CREATE TABLE IF NOT EXISTS books (
|
||||
id UUID PRIMARY KEY,
|
||||
title VARCHAR(1024),
|
||||
author VARCHAR(512),
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chapters (
|
||||
id UUID PRIMARY KEY,
|
||||
book_id UUID NOT NULL REFERENCES books(id) ON DELETE CASCADE,
|
||||
chapter_number INTEGER NOT NULL,
|
||||
chapter_title VARCHAR(1024),
|
||||
content TEXT,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chapter_analyses (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
chapter_id UUID NOT NULL UNIQUE REFERENCES chapters(id) ON DELETE CASCADE,
|
||||
analysis_result JSONB NOT NULL,
|
||||
validation_score FLOAT,
|
||||
validated_at TIMESTAMP WITH TIME ZONE,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tags (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
name VARCHAR(512) NOT NULL UNIQUE,
|
||||
category VARCHAR(64),
|
||||
description TEXT,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chapter_tags (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
chapter_id UUID NOT NULL REFERENCES chapters(id) ON DELETE CASCADE,
|
||||
tag_id UUID NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
|
||||
confidence FLOAT,
|
||||
validated BOOLEAN DEFAULT true,
|
||||
source VARCHAR(64) DEFAULT 'ai_validation',
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
UNIQUE (chapter_id, tag_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_chapters_book_id ON chapters(book_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_chapter_analyses_chapter_id ON chapter_analyses(chapter_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_chapter_tags_chapter_id ON chapter_tags(chapter_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_chapter_tags_tag_id ON chapter_tags(tag_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_tags_category ON tags(category);
|
||||
Reference in New Issue
Block a user