# /app/app/workers/vehicle/vehicle_robot_3_alchemist_pro.py import asyncio import logging import datetime import random import sys import warnings from sqlalchemy import select, and_, update, func, case from app.database import AsyncSessionLocal from app.models.vehicle_definitions import VehicleModelDefinition from app.models.asset import AssetCatalog from app.services.ai_service import AIService # DuckDuckGo hiba-elnyomás warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search') from duckduckgo_search import DDGS # MB 2.0 Szigorú naplózás logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s', stream=sys.stdout ) logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro") class TechEnricher: """ Vehicle Robot 3: Industrial Alchemist (Pro Edition). Felelős az MDM 'Arany' rekordjainak előállításáért hibrid (RDW + AI + Web) logikával. """ def __init__(self): self.max_attempts = 5 self.batch_size = 10 self.daily_ai_limit = 500 self.ai_calls_today = 0 self.last_reset_date = datetime.date.today() self.search_timeout = 15.0 def check_budget(self) -> bool: """ Napi AI keret ellenőrzése. """ if datetime.date.today() > self.last_reset_date: self.ai_calls_today = 0 self.last_reset_date = datetime.date.today() return self.ai_calls_today < self.daily_ai_limit def is_data_sane(self, data: dict, rdw_kw: int, rdw_ccm: int) -> bool: """ Hallucináció elleni védelem: technikai józansági vizsgálat. ÚJ: Ha az RDW-től van biztos adatunk, akkor megengedőbbek vagyunk az AI-val, mert a fő adatokat az RDW-ből vesszük. """ # Ha van hivatalos adat, akkor "Sane", a többit megoldjuk a hibrid logikával if rdw_kw > 0 or rdw_ccm > 0: return True try: if not data: return False ccm = int(data.get("ccm", 0) or 0) kw = int(data.get("kw", 0) or 0) # Ne engedjünk be teljesen üres adatot, ha nincs RDW támasz sem if ccm == 0 and kw == 0 and data.get("vehicle_type") != "trailer": return False if ccm > 16000 or (kw > 1500 and data.get("vehicle_type") != "truck"): return False return True except Exception as e: logger.debug(f"Data Sane Error: {e}") return False async def get_web_wisdom(self, make: str, model: str) -> str: """ Adatgyűjtés a DuckDuckGo-ról szálbiztos és timeouttal védett módon. """ query = f"{make} {model} technical specifications engine code fuel" try: def sync_search(): with DDGS() as ddgs: results = ddgs.text(query, max_results=3) return "\n".join([r['body'] for r in results]) if results else "" return await asyncio.wait_for(asyncio.to_thread(sync_search), timeout=self.search_timeout) except asyncio.TimeoutError: logger.warning(f"⏱️ Web keresési időtúllépés ({make} {model})") return "" except Exception as e: logger.warning(f"🌐 Keresési hiba ({make}): {e}") return "" async def process_single_record(self, record_id: int): """ Rekord dúsítás: Read -> AI Process -> Hybrid Gold Data Merge. """ make, m_name, v_type = "", "", "car" web_context = "" # ÚJ: RDW adatok tárolója rdw_kw, rdw_ccm, rdw_fuel, rdw_engine = 0, 0, "petrol", "" # 1. LÉPÉS: Olvasás és státuszváltás try: async with AsyncSessionLocal() as db: res = await db.execute( select(VehicleModelDefinition) .where(VehicleModelDefinition.id == record_id) .with_for_update(skip_locked=True) ) rec = res.scalar_one_or_none() if not rec: return make = rec.make m_name = rec.marketing_name v_type = rec.vehicle_class or "car" web_context = rec.raw_search_context or "" # ÚJ: Kimentjük a Hunter által szerzett hivatalos RDW adatokat! rdw_kw = rec.power_kw or 0 rdw_ccm = rec.engine_capacity or 0 rdw_fuel = rec.fuel_type or "petrol" rdw_engine = rec.engine_code or "" rec.status = "ai_synthesis_in_progress" await db.commit() except Exception as e: logger.error(f"🚨 Adatbázis hiba olvasáskor (ID: {record_id}): {e}") return # 2. LÉPÉS: AI és Web munka try: logger.info(f"🧠 AI elemzés indul: {make} {m_name}") # Átadjuk az AI-nak az RDW adatokat is kontextusként, hogy "okosodjon" belőle sources_dict = { "web_context": web_context, "vehicle_class": v_type, "rdw_kw": rdw_kw, "rdw_ccm": rdw_ccm } ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict) # Ha az AI gyenge adatot hoz vissza, és az RDW adatunk is hiányos, akkor webezünk if (not ai_data or not ai_data.get("kw")) and rdw_kw == 0: logger.info(f"🔍 Adathiány, extra webes mélyfúrás: {make} {m_name}") extra_web_info = await self.get_web_wisdom(make, m_name) sources_dict["web_context"] = extra_web_info ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict) # ÚJ: Hibrid józansági vizsgálat if not ai_data: ai_data = {} if not self.is_data_sane(ai_data, rdw_kw, rdw_ccm): raise ValueError("Az AI válasza hallucinált ÉS hivatalos RDW adatunk sincs.") self.ai_calls_today += 1 # ÚJ: HIBRID ADAT-ÖSSZEVONÁS (The Magic!) # RDW (hivatalos) > AI (generált) final_kw = rdw_kw if rdw_kw > 0 else (ai_data.get("kw") or 0) final_ccm = rdw_ccm if rdw_ccm > 0 else (ai_data.get("ccm") or 0) # Üzemanyag tisztítás (az RDW néha hollandul írja, ezt az AI tisztázhatja, de ha nincs AI, marad az RDW) final_fuel = rdw_fuel if (rdw_fuel and rdw_fuel != "Unknown") else ai_data.get("fuel_type", "petrol") final_engine = rdw_engine if rdw_engine else ai_data.get("engine_code", "Nincs adat") # Befrissítjük a JSON payloadot is a biztos adatokkal ai_data["kw"] = final_kw ai_data["ccm"] = final_ccm ai_data["engine_code"] = final_engine # 3. LÉPÉS: Arany rekord mentése async with AsyncSessionLocal() as db: clean_model = str(ai_data.get("marketing_name", m_name))[:50].upper() cat_stmt = select(AssetCatalog).where(and_( AssetCatalog.make == make.upper(), AssetCatalog.model == clean_model, AssetCatalog.power_kw == final_kw # A pontos KW alapján egyedi )).limit(1) existing_cat = (await db.execute(cat_stmt)).scalar_one_or_none() if not existing_cat: db.add(AssetCatalog( make=make.upper(), model=clean_model, power_kw=final_kw, engine_capacity=final_ccm, fuel_type=final_fuel, vehicle_class=v_type, factory_data=ai_data # Dúsított JSON )) logger.info(f"✨ ÚJ ARANY REKORD (HIBRID): {make.upper()} {clean_model} ({final_ccm}ccm, {final_kw}kW)") # Staging frissítése a biztos adatokkal await db.execute( update(VehicleModelDefinition) .where(VehicleModelDefinition.id == record_id) .values( status="gold_enriched", technical_code=ai_data.get("technical_code") or f"REF-{record_id}", engine_capacity=final_ccm, power_kw=final_kw, updated_at=func.now() ) ) await db.commit() except Exception as e: # 4. LÉPÉS: Hibakezelés logger.error(f"🚨 Hiba a(z) {record_id} rekordnál ({make} {m_name}): {e}") try: async with AsyncSessionLocal() as db: await db.execute( update(VehicleModelDefinition) .where(VehicleModelDefinition.id == record_id) .values( attempts=VehicleModelDefinition.attempts + 1, last_error=str(e)[:200], status=case( (VehicleModelDefinition.attempts >= self.max_attempts - 1, "suspended"), else_="unverified" ), updated_at=func.now() ) ) await db.commit() except Exception as db_err: logger.critical(f"💀 Végzetes adatbázis hiba a fallback mentésnél: {db_err}") async def run(self): logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Napi limit: {self.daily_ai_limit})") while True: try: if not self.check_budget(): logger.warning("💰 AI Keret kimerült. Alvás 1 órát.") await asyncio.sleep(3600) continue async with AsyncSessionLocal() as db: stmt = select(VehicleModelDefinition.id).where(and_( VehicleModelDefinition.status.in_(["unverified", "awaiting_ai_synthesis"]), VehicleModelDefinition.attempts < self.max_attempts )).limit(self.batch_size) res = await db.execute(stmt) ids = [r[0] for r in res.fetchall()] if not ids: await asyncio.sleep(60) continue for rid in ids: await self.process_single_record(rid) await asyncio.sleep(random.uniform(5.0, 15.0)) # GPU kímélés except Exception as e: logger.error(f"💀 Kritikus hiba a főciklusban: {e}") await asyncio.sleep(10) if __name__ == "__main__": try: asyncio.run(TechEnricher().run()) except KeyboardInterrupt: logger.info("🛑 Alchemist Pro leállítva.")