teljes backend_mentés

2026-03-22 18:59:27 +00:00
parent 5d44339f21
commit 5d96b00f81
34 changed files with 2575 additions and 977 deletions
--- a/backend/app/scripts/audit_scanner.py
+++ b/backend/app/scripts/audit_scanner.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+"""
+Audit Scanner for Codebase Analysis (#42)
+
+This script performs a comprehensive audit of the Python codebase:
+1. Recursively scans the backend/app directory for .py files
+2. Excludes __init__.py files and alembic/versions directory
+3. Groups files by directory structure (api, services, models, etc.)
+4. Extracts docstrings and class/function names from each file
+5. Generates a Markdown audit ledger with checkboxes for tracking
+"""
+
+import os
+import re
+import ast
+from pathlib import Path
+from typing import Dict, List, Tuple, Set
+import datetime
+
+# Project root (relative to script location in container)
+PROJECT_ROOT = Path("/app")
+BACKEND_DIR = PROJECT_ROOT / "app"  # /app/app is the backend root in container
+OUTPUT_FILE = Path("/app/.roo/audit_ledger_94.md")
+
+# Directories to exclude
+EXCLUDE_DIRS = {"__pycache__", ".git", "alembic/versions", "migrations"}
+EXCLUDE_FILES = {"__init__.py"}
+
+def extract_python_info(file_path: Path) -> Tuple[str, List[str], List[str]]:
+    """
+    Extract docstring and class/function names from a Python file.
+    Returns: (docstring, class_names, function_names)
+    """
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        # Try to parse with AST
+        try:
+            tree = ast.parse(content)
+            
+            # Extract module docstring
+            docstring = ast.get_docstring(tree) or ""
+            
+            # Extract class and function names
+            class_names = []
+            function_names = []
+            
+            for node in ast.walk(tree):
+                if isinstance(node, ast.ClassDef):
+                    class_names.append(node.name)
+                elif isinstance(node, ast.FunctionDef):
+                    # Only top-level functions (not methods)
+                    if not isinstance(node.parent, ast.ClassDef):
+                        function_names.append(node.name)
+            
+            return docstring, class_names, function_names
+            
+        except (SyntaxError, ValueError):
+            # If AST parsing fails, use simple regex extraction
+            docstring_match = re.search(r'"""(.*?)"""', content, re.DOTALL)
+            docstring = docstring_match.group(1).strip() if docstring_match else ""
+            
+            # Simple regex for class and function definitions
+            class_matches = re.findall(r'^class\s+(\w+)', content, re.MULTILINE)
+            func_matches = re.findall(r'^def\s+(\w+)', content, re.MULTILINE)
+            
+            return docstring, class_matches, func_matches
+            
+    except Exception as e:
+        return f"Error reading file: {e}", [], []
+
+def get_file_summary(docstring: str, class_names: List[str], function_names: List[str]) -> str:
+    """Create a summary string from extracted information."""
+    parts = []
+    
+    if docstring:
+        # Take first line of docstring, max 100 chars
+        first_line = docstring.split('\n')[0].strip()
+        if len(first_line) > 100:
+            first_line = first_line[:97] + "..."
+        parts.append(f'"{first_line}"')
+    
+    if class_names:
+        parts.append(f"Classes: {', '.join(class_names[:5])}")
+        if len(class_names) > 5:
+            parts[-1] += f" (+{len(class_names)-5} more)"
+    
+    if function_names:
+        parts.append(f"Functions: {', '.join(function_names[:5])}")
+        if len(function_names) > 5:
+            parts[-1] += f" (+{len(function_names)-5} more)"
+    
+    return " - ".join(parts) if parts else "No docstring or definitions found"
+
+def scan_python_files(root_dir: Path) -> Dict[str, List[Tuple[Path, str]]]:
+    """
+    Scan for Python files and group them by directory category.
+    Returns: {category: [(file_path, summary), ...]}
+    """
+    categories = {}
+    
+    for py_file in root_dir.rglob("*.py"):
+        # Skip excluded directories
+        if any(excluded in str(py_file) for excluded in EXCLUDE_DIRS):
+            continue
+        
+        # Skip excluded files
+        if py_file.name in EXCLUDE_FILES:
+            continue
+        
+        # Determine category based on directory structure
+        rel_path = py_file.relative_to(root_dir)
+        path_parts = list(rel_path.parts)
+        
+        # Categorize based on first few directory levels
+        category = "Other"
+        if len(path_parts) >= 2:
+            if path_parts[0] == "api":
+                category = "API Endpoints"
+            elif path_parts[0] == "services":
+                category = "Services"
+            elif path_parts[0] == "models":
+                category = "Models"
+            elif path_parts[0] == "core":
+                category = "Core"
+            elif path_parts[0] == "workers":
+                category = "Workers"
+            elif path_parts[0] == "scripts":
+                category = "Scripts"
+            elif path_parts[0] == "tests" or path_parts[0] == "tests_internal" or path_parts[0] == "test_outside":
+                category = "Tests"
+            elif path_parts[0] == "crud":
+                category = "CRUD"
+            elif path_parts[0] == "schemas":
+                category = "Schemas"
+            elif path_parts[0] == "templates":
+                category = "Templates"
+            elif path_parts[0] == "static":
+                category = "Static"
+        
+        # Extract file info
+        docstring, class_names, function_names = extract_python_info(py_file)
+        summary = get_file_summary(docstring, class_names, function_names)
+        
+        # Add to category
+        if category not in categories:
+            categories[category] = []
+        
+        categories[category].append((rel_path, summary))
+    
+    return categories
+
+def generate_markdown(categories: Dict[str, List[Tuple[Path, str]]]) -> str:
+    """Generate Markdown content from categorized files."""
+    lines = []
+    
+    # Header
+    lines.append("# Codebase Audit Ledger (#42)")
+    lines.append("")
+    lines.append(f"*Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*")
+    lines.append(f"*Total files scanned: {sum(len(files) for files in categories.values())}*")
+    lines.append("")
+    lines.append("## 📋 Audit Checklist")
+    lines.append("")
+    lines.append("Check each file after audit completion. Use this ledger to track progress.")
+    lines.append("")
+    
+    # Sort categories for consistent output
+    sorted_categories = sorted(categories.items(), key=lambda x: x[0])
+    
+    for category, files in sorted_categories:
+        lines.append(f"## {category} (`backend/app/{category.lower().replace(' ', '_')}/...`)")
+        lines.append("")
+        
+        # Sort files alphabetically
+        files.sort(key=lambda x: str(x[0]))
+        
+        for file_path, summary in files:
+            # Create checkbox and file entry
+            lines.append(f"- [ ] `{file_path}` - {summary}")
+        
+        lines.append("")
+    
+    # Add statistics
+    lines.append("## 📊 Statistics")
+    lines.append("")
+    lines.append("| Category | File Count |")
+    lines.append("|----------|------------|")
+    for category, files in sorted_categories:
+        lines.append(f"| {category} | {len(files)} |")
+    
+    lines.append("")
+    lines.append("## 🎯 Next Steps")
+    lines.append("")
+    lines.append("1. **Review each file** for functionality and dependencies")
+    lines.append("2. **Document findings** in individual audit reports")
+    lines.append("3. **Identify gaps** in test coverage and documentation")
+    lines.append("4. **Prioritize refactoring** based on complexity and criticality")
+    lines.append("")
+    lines.append("*This ledger is automatically generated by `audit_scanner.py`*")
+    
+    return "\n".join(lines)
+
+def main():
+    print("🔍 Starting codebase audit scan...")
+    print(f"Scanning directory: {BACKEND_DIR}")
+    
+    if not BACKEND_DIR.exists():
+        print(f"Error: Directory {BACKEND_DIR} does not exist!")
+        return 1
+    
+    # Scan files
+    categories = scan_python_files(BACKEND_DIR)
+    
+    # Generate markdown
+    markdown_content = generate_markdown(categories)
+    
+    # Write output
+    OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
+    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
+        f.write(markdown_content)
+    
+    total_files = sum(len(files) for files in categories.values())
+    print(f"✅ Scan complete! Found {total_files} Python files.")
+    print(f"📄 Report generated: {OUTPUT_FILE}")
+    
+    # Print summary
+    print("\n📊 Category breakdown:")
+    for category, files in sorted(categories.items(), key=lambda x: x[0]):
+        print(f"  {category}: {len(files)} files")
+    
+    return 0
+
+if __name__ == "__main__":
+    exit(main())