teljes backend_mentés
This commit is contained in:
236
backend/app/scripts/audit_scanner.py
Normal file
236
backend/app/scripts/audit_scanner.py
Normal file
@@ -0,0 +1,236 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Audit Scanner for Codebase Analysis (#42)
|
||||
|
||||
This script performs a comprehensive audit of the Python codebase:
|
||||
1. Recursively scans the backend/app directory for .py files
|
||||
2. Excludes __init__.py files and alembic/versions directory
|
||||
3. Groups files by directory structure (api, services, models, etc.)
|
||||
4. Extracts docstrings and class/function names from each file
|
||||
5. Generates a Markdown audit ledger with checkboxes for tracking
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import ast
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Set
|
||||
import datetime
|
||||
|
||||
# Project root (relative to script location in container)
|
||||
PROJECT_ROOT = Path("/app")
|
||||
BACKEND_DIR = PROJECT_ROOT / "app" # /app/app is the backend root in container
|
||||
OUTPUT_FILE = Path("/app/.roo/audit_ledger_94.md")
|
||||
|
||||
# Directories to exclude
|
||||
EXCLUDE_DIRS = {"__pycache__", ".git", "alembic/versions", "migrations"}
|
||||
EXCLUDE_FILES = {"__init__.py"}
|
||||
|
||||
def extract_python_info(file_path: Path) -> Tuple[str, List[str], List[str]]:
|
||||
"""
|
||||
Extract docstring and class/function names from a Python file.
|
||||
Returns: (docstring, class_names, function_names)
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Try to parse with AST
|
||||
try:
|
||||
tree = ast.parse(content)
|
||||
|
||||
# Extract module docstring
|
||||
docstring = ast.get_docstring(tree) or ""
|
||||
|
||||
# Extract class and function names
|
||||
class_names = []
|
||||
function_names = []
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.ClassDef):
|
||||
class_names.append(node.name)
|
||||
elif isinstance(node, ast.FunctionDef):
|
||||
# Only top-level functions (not methods)
|
||||
if not isinstance(node.parent, ast.ClassDef):
|
||||
function_names.append(node.name)
|
||||
|
||||
return docstring, class_names, function_names
|
||||
|
||||
except (SyntaxError, ValueError):
|
||||
# If AST parsing fails, use simple regex extraction
|
||||
docstring_match = re.search(r'"""(.*?)"""', content, re.DOTALL)
|
||||
docstring = docstring_match.group(1).strip() if docstring_match else ""
|
||||
|
||||
# Simple regex for class and function definitions
|
||||
class_matches = re.findall(r'^class\s+(\w+)', content, re.MULTILINE)
|
||||
func_matches = re.findall(r'^def\s+(\w+)', content, re.MULTILINE)
|
||||
|
||||
return docstring, class_matches, func_matches
|
||||
|
||||
except Exception as e:
|
||||
return f"Error reading file: {e}", [], []
|
||||
|
||||
def get_file_summary(docstring: str, class_names: List[str], function_names: List[str]) -> str:
|
||||
"""Create a summary string from extracted information."""
|
||||
parts = []
|
||||
|
||||
if docstring:
|
||||
# Take first line of docstring, max 100 chars
|
||||
first_line = docstring.split('\n')[0].strip()
|
||||
if len(first_line) > 100:
|
||||
first_line = first_line[:97] + "..."
|
||||
parts.append(f'"{first_line}"')
|
||||
|
||||
if class_names:
|
||||
parts.append(f"Classes: {', '.join(class_names[:5])}")
|
||||
if len(class_names) > 5:
|
||||
parts[-1] += f" (+{len(class_names)-5} more)"
|
||||
|
||||
if function_names:
|
||||
parts.append(f"Functions: {', '.join(function_names[:5])}")
|
||||
if len(function_names) > 5:
|
||||
parts[-1] += f" (+{len(function_names)-5} more)"
|
||||
|
||||
return " - ".join(parts) if parts else "No docstring or definitions found"
|
||||
|
||||
def scan_python_files(root_dir: Path) -> Dict[str, List[Tuple[Path, str]]]:
|
||||
"""
|
||||
Scan for Python files and group them by directory category.
|
||||
Returns: {category: [(file_path, summary), ...]}
|
||||
"""
|
||||
categories = {}
|
||||
|
||||
for py_file in root_dir.rglob("*.py"):
|
||||
# Skip excluded directories
|
||||
if any(excluded in str(py_file) for excluded in EXCLUDE_DIRS):
|
||||
continue
|
||||
|
||||
# Skip excluded files
|
||||
if py_file.name in EXCLUDE_FILES:
|
||||
continue
|
||||
|
||||
# Determine category based on directory structure
|
||||
rel_path = py_file.relative_to(root_dir)
|
||||
path_parts = list(rel_path.parts)
|
||||
|
||||
# Categorize based on first few directory levels
|
||||
category = "Other"
|
||||
if len(path_parts) >= 2:
|
||||
if path_parts[0] == "api":
|
||||
category = "API Endpoints"
|
||||
elif path_parts[0] == "services":
|
||||
category = "Services"
|
||||
elif path_parts[0] == "models":
|
||||
category = "Models"
|
||||
elif path_parts[0] == "core":
|
||||
category = "Core"
|
||||
elif path_parts[0] == "workers":
|
||||
category = "Workers"
|
||||
elif path_parts[0] == "scripts":
|
||||
category = "Scripts"
|
||||
elif path_parts[0] == "tests" or path_parts[0] == "tests_internal" or path_parts[0] == "test_outside":
|
||||
category = "Tests"
|
||||
elif path_parts[0] == "crud":
|
||||
category = "CRUD"
|
||||
elif path_parts[0] == "schemas":
|
||||
category = "Schemas"
|
||||
elif path_parts[0] == "templates":
|
||||
category = "Templates"
|
||||
elif path_parts[0] == "static":
|
||||
category = "Static"
|
||||
|
||||
# Extract file info
|
||||
docstring, class_names, function_names = extract_python_info(py_file)
|
||||
summary = get_file_summary(docstring, class_names, function_names)
|
||||
|
||||
# Add to category
|
||||
if category not in categories:
|
||||
categories[category] = []
|
||||
|
||||
categories[category].append((rel_path, summary))
|
||||
|
||||
return categories
|
||||
|
||||
def generate_markdown(categories: Dict[str, List[Tuple[Path, str]]]) -> str:
|
||||
"""Generate Markdown content from categorized files."""
|
||||
lines = []
|
||||
|
||||
# Header
|
||||
lines.append("# Codebase Audit Ledger (#42)")
|
||||
lines.append("")
|
||||
lines.append(f"*Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*")
|
||||
lines.append(f"*Total files scanned: {sum(len(files) for files in categories.values())}*")
|
||||
lines.append("")
|
||||
lines.append("## 📋 Audit Checklist")
|
||||
lines.append("")
|
||||
lines.append("Check each file after audit completion. Use this ledger to track progress.")
|
||||
lines.append("")
|
||||
|
||||
# Sort categories for consistent output
|
||||
sorted_categories = sorted(categories.items(), key=lambda x: x[0])
|
||||
|
||||
for category, files in sorted_categories:
|
||||
lines.append(f"## {category} (`backend/app/{category.lower().replace(' ', '_')}/...`)")
|
||||
lines.append("")
|
||||
|
||||
# Sort files alphabetically
|
||||
files.sort(key=lambda x: str(x[0]))
|
||||
|
||||
for file_path, summary in files:
|
||||
# Create checkbox and file entry
|
||||
lines.append(f"- [ ] `{file_path}` - {summary}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# Add statistics
|
||||
lines.append("## 📊 Statistics")
|
||||
lines.append("")
|
||||
lines.append("| Category | File Count |")
|
||||
lines.append("|----------|------------|")
|
||||
for category, files in sorted_categories:
|
||||
lines.append(f"| {category} | {len(files)} |")
|
||||
|
||||
lines.append("")
|
||||
lines.append("## 🎯 Next Steps")
|
||||
lines.append("")
|
||||
lines.append("1. **Review each file** for functionality and dependencies")
|
||||
lines.append("2. **Document findings** in individual audit reports")
|
||||
lines.append("3. **Identify gaps** in test coverage and documentation")
|
||||
lines.append("4. **Prioritize refactoring** based on complexity and criticality")
|
||||
lines.append("")
|
||||
lines.append("*This ledger is automatically generated by `audit_scanner.py`*")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def main():
|
||||
print("🔍 Starting codebase audit scan...")
|
||||
print(f"Scanning directory: {BACKEND_DIR}")
|
||||
|
||||
if not BACKEND_DIR.exists():
|
||||
print(f"Error: Directory {BACKEND_DIR} does not exist!")
|
||||
return 1
|
||||
|
||||
# Scan files
|
||||
categories = scan_python_files(BACKEND_DIR)
|
||||
|
||||
# Generate markdown
|
||||
markdown_content = generate_markdown(categories)
|
||||
|
||||
# Write output
|
||||
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
|
||||
f.write(markdown_content)
|
||||
|
||||
total_files = sum(len(files) for files in categories.values())
|
||||
print(f"✅ Scan complete! Found {total_files} Python files.")
|
||||
print(f"📄 Report generated: {OUTPUT_FILE}")
|
||||
|
||||
# Print summary
|
||||
print("\n📊 Category breakdown:")
|
||||
for category, files in sorted(categories.items(), key=lambda x: x[0]):
|
||||
print(f" {category}: {len(files)} files")
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
Reference in New Issue
Block a user