init

2026-02-01 17:01:21 +03:00
commit 9575eaf8ee
144 changed files with 24025 additions and 0 deletions
--- a/epub-parser/batch_statistics.py
+++ b/epub-parser/batch_statistics.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+"""
+Скрипт для пакетной обработки EPUB файлов из каталога и получения статистики.
+
+Использование:
+    python batch_statistics.py <путь_к_каталогу_с_epub> <путь_к_выходному_каталогу>
+    python batch_statistics.py /path/to/books /path/to/statistics
+"""
+import sys
+import json
+import argparse
+from pathlib import Path
+from typing import List, Dict, Any
+from ebooklib import epub
+
+# Импортируем функции из app.py и statistics.py
+from app import parse_epub_content, calculate_chapter_tokens
+from statistics import format_statistics, sanitize_filename
+
+
+def process_epub_file(epub_path: Path, output_dir: Path) -> Dict[str, Any]:
+    """Обрабатывает один EPUB файл и сохраняет статистику.
+    
+    Args:
+        epub_path: Путь к EPUB файлу.
+        output_dir: Каталог для сохранения результатов.
+        
+    Returns:
+        Словарь с результатами обработки (успех/ошибка, путь к файлу и т.д.).
+    """
+    result: Dict[str, Any] = {
+        'epub_file': str(epub_path),
+        'success': False,
+        'output_file': None,
+        'error': None
+    }
+    
+    try:
+        # Читаем и парсим EPUB
+        book = epub.read_epub(str(epub_path))
+        title, author, metadata, chapters = parse_epub_content(book)
+        
+        # Формируем статистику
+        statistics = format_statistics(title, author, chapters)
+        
+        # Создаем имя выходного файла
+        safe_title = sanitize_filename(title)
+        output_filename = f"{safe_title}_statistics.json"
+        output_path = output_dir / output_filename
+        
+        # Сохраняем в файл
+        output_dir.mkdir(parents=True, exist_ok=True)
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(statistics, f, ensure_ascii=False, indent=2)
+        
+        result['success'] = True
+        result['output_file'] = str(output_path)
+        result['book_title'] = title
+        result['book_author'] = author
+        
+        return result
+        
+    except Exception as e:
+        result['error'] = str(e)
+        return result
+
+
+def find_epub_files(directory: Path) -> List[Path]:
+    """Находит все EPUB файлы в указанном каталоге.
+    
+    Args:
+        directory: Каталог для поиска.
+        
+    Returns:
+        Список путей к EPUB файлам.
+    """
+    epub_files: List[Path] = []
+    
+    if not directory.exists():
+        return epub_files
+    
+    if directory.is_file():
+        if directory.suffix.lower() == '.epub':
+            epub_files.append(directory)
+    else:
+        # Ищем все .epub файлы в каталоге
+        epub_files = list(directory.glob('*.epub'))
+        epub_files.extend(directory.glob('*.EPUB'))
+    
+    return sorted(epub_files)
+
+
+def main():
+    """Основная функция скрипта."""
+    parser = argparse.ArgumentParser(
+        description='Пакетная обработка EPUB файлов для получения статистики',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Примеры использования:
+  python batch_statistics.py /path/to/books /path/to/statistics
+  python batch_statistics.py ./books ./statistics
+        """
+    )
+    parser.add_argument(
+        'books_dir',
+        type=str,
+        help='Путь к каталогу с EPUB файлами'
+    )
+    parser.add_argument(
+        'output_dir',
+        type=str,
+        help='Путь к каталогу для сохранения результатов'
+    )
+    parser.add_argument(
+        '--summary',
+        type=str,
+        default=None,
+        help='Путь к файлу для сохранения сводки обработки (JSON)'
+    )
+    parser.add_argument(
+        '--verbose',
+        action='store_true',
+        help='Выводить подробную информацию о процессе обработки'
+    )
+    
+    args = parser.parse_args()
+    
+    # Проверяем входной каталог
+    books_path = Path(args.books_dir)
+    if not books_path.exists():
+        print(f"Ошибка: Каталог '{books_path}' не найден.", file=sys.stderr)
+        sys.exit(1)
+    
+    # Создаем выходной каталог
+    output_path = Path(args.output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    
+    # Находим все EPUB файлы
+    epub_files = find_epub_files(books_path)
+    
+    if not epub_files:
+        print(f"Предупреждение: В каталоге '{books_path}' не найдено EPUB файлов.", file=sys.stderr)
+        sys.exit(0)
+    
+    print(f"Найдено EPUB файлов: {len(epub_files)}")
+    print(f"Выходной каталог: {output_path}")
+    print("-" * 80)
+    
+    # Обрабатываем каждый файл
+    results: List[Dict[str, Any]] = []
+    successful = 0
+    failed = 0
+    
+    for i, epub_file in enumerate(epub_files, 1):
+        if args.verbose:
+            print(f"[{i}/{len(epub_files)}] Обработка: {epub_file.name}")
+        else:
+            print(f"[{i}/{len(epub_files)}] {epub_file.name}...", end=' ', flush=True)
+        
+        result = process_epub_file(epub_file, output_path)
+        results.append(result)
+        
+        if result['success']:
+            successful += 1
+            if args.verbose:
+                print(f"  ✓ Успешно: {result['output_file']}")
+                print(f"    Название: {result.get('book_title', 'N/A')}")
+                print(f"    Автор: {result.get('book_author', 'N/A')}")
+            else:
+                print("✓")
+        else:
+            failed += 1
+            if args.verbose:
+                print(f"  ✗ Ошибка: {result['error']}")
+            else:
+                print(f"✗ ({result['error']})")
+    
+    # Выводим итоговую статистику
+    print("-" * 80)
+    print(f"Обработка завершена:")
+    print(f"  Успешно: {successful}")
+    print(f"  Ошибок: {failed}")
+    print(f"  Всего: {len(epub_files)}")
+    
+    # Сохраняем сводку, если указано
+    if args.summary:
+        summary_path = Path(args.summary)
+        summary_data = {
+            'total_files': len(epub_files),
+            'successful': successful,
+            'failed': failed,
+            'output_directory': str(output_path),
+            'results': results
+        }
+        summary_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(summary_path, 'w', encoding='utf-8') as f:
+            json.dump(summary_data, f, ensure_ascii=False, indent=2)
+        print(f"\nСводка сохранена в: {summary_path}")
+
+
+if __name__ == '__main__':
+    main()