import asyncio import os import logging from PIL import Image from sqlalchemy import select, update from app.db.session import SessionLocal from app.models.document import Document # Feltételezve from app.models.identity import User from app.services.ai_service import AIService logging.basicConfig(level=logging.INFO) logger = logging.getLogger("Robot-OCR-V3") NAS_BASE_PATH = os.getenv("NAS_STORAGE_PATH", "/mnt/nas/user_vault") class OCRRobot: @classmethod async def process_queue(cls): async with SessionLocal() as db: # 1. Csak a várólistás és prémium jogosultságú dokumentumokat keressük stmt = select(Document, User).join(User).where( Document.status == "pending_ocr", User.subscription_plan.in_(["PREMIUM_PLUS", "VIP_PLUS"]) ).limit(10) res = await db.execute(stmt) tasks = res.all() for doc, user in tasks: try: logger.info(f"📸 OCR feldolgozás: {doc.filename} (User: {user.id})") # 2. AI OCR hívás with open(doc.temp_path, "rb") as f: image_bytes = f.read() ocr_result = await AIService.analyze_document_image(image_bytes, doc.doc_type) if ocr_result: # 3. Kép átméretezése (Thumbnail és Standard) target_dir = os.path.join(NAS_BASE_PATH, user.folder_slug, doc.doc_type) os.makedirs(target_dir, exist_ok=True) final_path = os.path.join(target_dir, f"{doc.id}.jpg") cls.resize_and_save(doc.temp_path, final_path) # 4. Adatbázis frissítése doc.ocr_data = ocr_result doc.file_link = final_path doc.status = "processed" # Ideiglenes fájl törlése os.remove(doc.temp_path) await db.commit() except Exception as e: logger.error(f"❌ OCR Hiba ({doc.id}): {e}") await db.rollback() @staticmethod def resize_and_save(source, target): with Image.open(source) as img: img.convert('RGB').save(target, "JPEG", quality=85, optimize=True) if __name__ == "__main__": asyncio.run(OCRRobot.process_queue())