Files
service-finder/backend/app/scripts/check_robots_integrity.py
2026-03-22 11:02:05 +00:00

439 lines
17 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Robot Health & Integrity Audit Script - Recursive Deep Integrity Audit
Ez a szkript automatikusan diagnosztizálja az összes robotunk (Scout, Enricher, Validator, Auditor)
üzembiztonságát rekurzív felfedezéssel. A következő ellenőrzéseket végzi el:
1. Auto-Discovery: Rekurzívan bejárja a `backend/app/workers/` teljes könyvtárszerkezetét
2. Identification: Minden `.py` fájlt, ami nem `__init__.py` és nem segédfájl, kezel robotként/worker-ként
3. Deep Import Test: Megpróbálja importálni mindet, különös figyelemmel a kritikus modulokra
4. Model Sync 2.0: Ellenőrzi, hogy az összes robot a helyes modelleket használja-e
5. Interface Standardizálás: Ellenőrzi a `run()` metódus jelenlétét
6. Kategorizált jelentés: Service, Vehicle General, Vehicle Special, System & OCR kategóriák
"""
import sys
import importlib
import inspect
import asyncio
from pathlib import Path
from typing import List, Dict, Any, Tuple
import logging
import re
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
logger = logging.getLogger("Robot-Integrity-Audit")
# Root directory for workers (relative to backend/app)
WORKERS_ROOT = Path(__file__).parent.parent / "workers"
# Exclusion patterns for non-robot files
EXCLUDE_PATTERNS = [
"__init__.py",
"__pycache__",
".pyc",
"test_",
"mapping_",
"config",
"dictionary",
"rules",
"report",
"monitor_",
"py_to_database",
"README",
# Files with dots in name (not valid Python module names)
r".*\..*\.py", # Matches files like "something.1.0.py"
]
# Categorization patterns
CATEGORY_PATTERNS = {
"Service Robots": [
r"service_robot_\d+",
r"service/.*\.py$",
],
"Vehicle General": [
r"vehicle_robot_[0-4]_.*",
r"R[0-4]_.*\.py$",
r"vehicle_robot_1_[245]_.*", # NHTSA, Heavy EU, GB
r"vehicle_robot_2_.*", # RDW, AutoData
],
"Vehicle Special": [
r"bike_.*\.py$",
r"vehicle_ultimate_.*\.py$",
r"ultimatespecs/.*\.py$",
],
"System & OCR": [
r"system_.*\.py$",
r"subscription_.*\.py$",
r"ocr/.*\.py$",
],
}
def discover_robot_files() -> List[Tuple[str, Path, str]]:
"""
Recursively discover all robot files in the workers directory.
Returns list of (module_name, file_path, category) tuples.
"""
robot_files = []
for py_file in WORKERS_ROOT.rglob("*.py"):
# Skip excluded files
file_name = py_file.name
# Check for simple pattern matches
skip = False
for pattern in EXCLUDE_PATTERNS:
if pattern.startswith('r.') and len(pattern) > 2:
# Regex pattern (simplified)
if re.match(pattern[2:], file_name):
skip = True
break
elif pattern in file_name:
skip = True
break
# Also skip files with multiple dots in name (not valid Python modules)
if file_name.count('.') > 1: # e.g., "something.1.0.py"
skip = True
if skip:
continue
# Skip directories
if not py_file.is_file():
continue
# Calculate module name (relative to backend/app)
try:
rel_path = py_file.relative_to(Path(__file__).parent.parent)
# Convert path parts to module names, handling dots in filenames
module_parts = []
for part in rel_path.parts:
if part.endswith('.py'):
part = part[:-3] # Remove .py
# Replace dots with underscores in filename (e.g., "1.0" -> "1_0")
part = part.replace('.', '_')
module_parts.append(part)
# Add 'app' prefix since we're in backend/app directory
module_name = "app." + ".".join(module_parts)
# Determine category
category = "Uncategorized"
for cat_name, patterns in CATEGORY_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, str(rel_path), re.IGNORECASE):
category = cat_name
break
if category != "Uncategorized":
break
robot_files.append((module_name, py_file, category))
except ValueError as e:
logger.warning(f"Could not determine module for {py_file}: {e}")
# Sort by category and module name
robot_files.sort(key=lambda x: (x[2], x[0]))
return robot_files
async def test_import(module_name: str) -> Tuple[bool, str]:
"""Try to import a robot module and return (success, error_message)."""
try:
module = importlib.import_module(module_name)
logger.info(f"{module_name} import successful")
return True, ""
except ImportError as e:
error_msg = f"ImportError: {e}"
logger.error(f"{module_name} import failed: {e}")
return False, error_msg
except SyntaxError as e:
error_msg = f"SyntaxError at line {e.lineno}: {e.msg}"
logger.error(f"{module_name} syntax error: {e}")
return False, error_msg
except Exception as e:
error_msg = f"Exception: {type(e).__name__}: {e}"
logger.error(f"{module_name} import failed: {e}")
return False, error_msg
async def check_model_sync(module_name: str) -> List[str]:
"""Check if a robot uses correct model references."""
errors = []
try:
module = importlib.import_module(module_name)
# Get all classes in the module
classes = [cls for name, cls in inspect.getmembers(module, inspect.isclass)
if not name.startswith('_')]
for cls in classes:
# Check class source code for model references
try:
source = inspect.getsource(cls)
# Look for common model name issues
old_patterns = [
r"VehicleModelDefinitions", # Plural mistake
r"vehicle_model_definitions", # Old table name
r"ExternalReferenceQueues", # Plural mistake
]
for pattern in old_patterns:
if re.search(pattern, source):
errors.append(f"⚠️ {module_name}.{cls.__name__} uses old pattern: {pattern}")
except (OSError, TypeError):
pass # Can't get source for built-in or C extensions
except Exception as e:
# If we can't import, this will be caught in import test
pass
return errors
async def test_robot_interface(module_name: str) -> Tuple[bool, List[str]]:
"""Test if a robot has a proper interface (run method, etc.)."""
interface_issues = []
try:
module = importlib.import_module(module_name)
# Find the main robot class (usually ends with the module name or contains 'Robot')
classes = [cls for name, cls in inspect.getmembers(module, inspect.isclass)
if not name.startswith('_')]
if not classes:
interface_issues.append("No classes found")
return False, interface_issues
main_class = None
for cls in classes:
cls_name = cls.__name__
# Heuristic: class name contains 'Robot' or matches file name pattern
if 'Robot' in cls_name or cls_name.lower().replace('_', '') in module_name.lower().replace('_', ''):
main_class = cls
break
if main_class is None:
main_class = classes[0] # Fallback to first class
# Check for run/execute/process method (can be classmethod or instance method)
has_run_method = hasattr(main_class, 'run')
has_execute_method = hasattr(main_class, 'execute')
has_process_method = hasattr(main_class, 'process')
if not (has_run_method or has_execute_method or has_process_method):
interface_issues.append(f"No run/execute/process method in {main_class.__name__}")
else:
# Log which method is found
if has_run_method:
run_method = getattr(main_class, 'run')
# Check if it's a classmethod or instance method
if inspect.ismethod(run_method) and run_method.__self__ is main_class:
logger.debug(f"{module_name}.{main_class.__name__}.run is classmethod")
elif inspect.iscoroutinefunction(run_method):
logger.debug(f"{module_name}.{main_class.__name__}.run is async")
else:
logger.debug(f" {module_name}.{main_class.__name__}.run is sync")
# Try to instantiate only if the class appears to be instantiable (not abstract)
# Check if class has __init__ that doesn't require special arguments
try:
# First check if class can be instantiated with no arguments
sig = inspect.signature(main_class.__init__)
params = list(sig.parameters.keys())
# If only 'self' parameter, it's instantiable
if len(params) == 1: # only self
instance = main_class()
interface_issues.append(f"Instantiation successful")
else:
interface_issues.append(f"Instantiation requires arguments, skipping")
except (TypeError, AttributeError):
# __init__ may not be standard, try anyway
try:
instance = main_class()
interface_issues.append(f"Instantiation successful")
except Exception as e:
interface_issues.append(f"Instantiation failed (expected): {e}")
# If we found at least one of the required methods, consider interface OK
interface_ok = has_run_method or has_execute_method or has_process_method
return interface_ok, interface_issues
except Exception as e:
interface_issues.append(f"Interface test error: {e}")
return False, interface_issues
async def check_syntax_errors(file_path: Path) -> List[str]:
"""Check for syntax errors by attempting to compile the file."""
errors = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
source = f.read()
compile(source, str(file_path), 'exec')
except SyntaxError as e:
errors.append(f"Syntax error at line {e.lineno}: {e.msg}")
except Exception as e:
errors.append(f"Compilation error: {e}")
return errors
async def generate_categorized_report(results: Dict) -> str:
"""Generate a categorized audit report."""
report_lines = []
report_lines.append("# 🤖 Robot Integrity Audit Report")
report_lines.append(f"Generated: {importlib.import_module('datetime').datetime.now().isoformat()}")
report_lines.append(f"Total robots discovered: {results['total_robots']}")
report_lines.append("")
for category in ["Service Robots", "Vehicle General", "Vehicle Special", "System & OCR", "Uncategorized"]:
cat_robots = [r for r in results['robots'] if r['category'] == category]
if not cat_robots:
continue
report_lines.append(f"## {category}")
report_lines.append(f"**Count:** {len(cat_robots)}")
# Statistics
import_success = sum(1 for r in cat_robots if r['import_success'])
syntax_success = sum(1 for r in cat_robots if not r['syntax_errors'])
interface_ok = sum(1 for r in cat_robots if r['interface_ok'])
report_lines.append(f"- Import successful: {import_success}/{len(cat_robots)}")
report_lines.append(f"- Syntax clean: {syntax_success}/{len(cat_robots)}")
report_lines.append(f"- Interface OK: {interface_ok}/{len(cat_robots)}")
# List problematic robots
problematic = [r for r in cat_robots if not r['import_success'] or r['syntax_errors'] or not r['interface_ok']]
if problematic:
report_lines.append("\n**Problematic robots:**")
for robot in problematic:
issues = []
if not robot['import_success']:
issues.append("Import failed")
if robot['syntax_errors']:
issues.append(f"Syntax errors ({len(robot['syntax_errors'])})")
if not robot['interface_ok']:
issues.append("Interface issues")
report_lines.append(f"- `{robot['module']}`: {', '.join(issues)}")
report_lines.append("")
# Summary
report_lines.append("## 📊 Summary")
report_lines.append(f"- **Total robots:** {results['total_robots']}")
report_lines.append(f"- **Import successful:** {results['import_success']}/{results['total_robots']}")
report_lines.append(f"- **Syntax clean:** {results['syntax_clean']}/{results['total_robots']}")
report_lines.append(f"- **Interface OK:** {results['interface_ok']}/{results['total_robots']}")
# Critical issues
critical = [r for r in results['robots'] if not r['import_success']]
if critical:
report_lines.append("\n## 🚨 Critical Issues (Import Failed)")
for robot in critical:
report_lines.append(f"- `{robot['module']}`: {robot['import_error']}")
return "\n".join(report_lines)
async def main():
"""Main audit function with recursive discovery."""
logger.info("🤖 Starting Recursive Deep Integrity Audit")
logger.info("=" * 60)
# Discover all robot files
logger.info("\n🔍 STEP 1: Discovering robot files...")
robot_files = discover_robot_files()
if not robot_files:
logger.error("❌ No robot files found!")
return False
logger.info(f"📁 Found {len(robot_files)} robot files")
results = {
'robots': [],
'total_robots': len(robot_files),
'import_success': 0,
'syntax_clean': 0,
'interface_ok': 0,
}
# Process each robot
logger.info("\n📦 STEP 2: Import and syntax tests...")
logger.info("-" * 40)
for i, (module_name, file_path, category) in enumerate(robot_files, 1):
logger.info(f"\n[{i}/{len(robot_files)}] Testing: {module_name} ({category})")
# Check syntax first
syntax_errors = await check_syntax_errors(file_path)
# Test import
import_success, import_error = await test_import(module_name)
# Test interface
interface_ok, interface_issues = await test_robot_interface(module_name)
# Check model sync
model_errors = await check_model_sync(module_name)
robot_result = {
'module': module_name,
'file': str(file_path),
'category': category,
'import_success': import_success,
'import_error': import_error,
'syntax_errors': syntax_errors,
'interface_ok': interface_ok,
'interface_issues': interface_issues,
'model_errors': model_errors,
}
results['robots'].append(robot_result)
if import_success:
results['import_success'] += 1
if not syntax_errors:
results['syntax_clean'] += 1
if interface_ok:
results['interface_ok'] += 1
# Log summary for this robot
status_symbol = "" if import_success and not syntax_errors else ""
logger.info(f"{status_symbol} {module_name}: Import={import_success}, Syntax={len(syntax_errors)} errors, Interface={interface_ok}")
# Generate report
logger.info("\n📊 STEP 3: Generating categorized report...")
report = await generate_categorized_report(results)
# Print summary to console
logger.info("\n" + "=" * 60)
logger.info("📊 AUDIT SUMMARY")
logger.info("=" * 60)
logger.info(f"Total robots discovered: {results['total_robots']}")
logger.info(f"Import successful: {results['import_success']}/{results['total_robots']}")
logger.info(f"Syntax clean: {results['syntax_clean']}/{results['total_robots']}")
logger.info(f"Interface OK: {results['interface_ok']}/{results['total_robots']}")
# Save report to file
report_path = Path(__file__).parent.parent.parent / "audit_report_robots.md"
with open(report_path, 'w', encoding='utf-8') as f:
f.write(report)
logger.info(f"\n📄 Full report saved to: {report_path}")
# Determine overall status
critical_count = sum(1 for r in results['robots'] if not r['import_success'])
if critical_count > 0:
logger.error(f"🚨 ROBOT INTEGRITY CHECK FAILED - {critical_count} critical issues found!")
return False
elif results['import_success'] < results['total_robots']:
logger.warning("⚠️ ROBOT INTEGRITY CHECK PASSED with warnings")
return True
else:
logger.info("✅ ROBOT INTEGRITY CHECK PASSED - All systems operational!")
return True
if __name__ == "__main__":
success = asyncio.run(main())
sys.exit(0 if success else 1)