Cleanup: MB 2.0 Gap Analysis előtti állapot (adatok kizárva)

This commit is contained in:
2026-02-23 09:44:02 +01:00
parent 5757754aae
commit 893f39fa15
74 changed files with 34239 additions and 2834 deletions

View File

@@ -0,0 +1,111 @@
import asyncio
import logging
from sqlalchemy import select, update, func, and_, case # JAVÍTVA: and_ és case importálva
from app.db.session import SessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.services.ai_service import AIService
# Logolás finomhangolása
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
logger = logging.getLogger("Robot-Alchemist-v2.2")
class AlchemistBot:
def __init__(self):
self.batch_size = 5 # GPU VRAM kímélése (Ollama párhuzamosítás mellett)
self.delay_between_records = 12 # Quadro P4000 hűtési idő/késleltetés
async def synthesize_vehicle(self, vehicle_id: int):
"""AI dúsítás végrehajtása a begyűjtött kontextusból."""
async with SessionLocal() as db:
res = await db.execute(select(VehicleModelDefinition).where(VehicleModelDefinition.id == vehicle_id))
v = res.scalar_one_or_none()
if not v or not v.raw_search_context:
logger.warning(f"⚠️ Nincs kontextus az ID:{vehicle_id} rekordhoz, átugrás.")
return
make, model = v.make, v.marketing_name
logger.info(f"🧪 Arany dúsítás indul (AI Synthesis): {make} {model}")
# Státusz zárolása a feldolgozás idejére
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(status='ai_synthesis_in_progress')
)
await db.commit()
# AI hívás: Gold-Data kinyerése a "szemetesládából"
gold_data = await AIService.get_gold_data_from_research(make, model, v.raw_search_context)
async with SessionLocal() as db:
if gold_data:
# Értékek kinyerése és normalizálása
ccm = gold_data.get("ccm")
kw = gold_data.get("kw")
m_name = gold_data.get("marketing_name", model)[:50]
t_code = gold_data.get("technical_code")
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
marketing_name=m_name,
technical_code=t_code or v.technical_code,
engine_capacity=ccm,
power_kw=kw,
features_json=gold_data, # A teljes technikai JSON (olaj, gumi, stb.)
status='gold_enriched',
updated_at=func.now()
)
)
logger.info(f"✨ GOLD ENRICHED: {make} {m_name} ({ccm} ccm, {kw} kW)")
else:
# Hiba esetén visszatesszük a sorba, növelve a kísérletek számát
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
status='awaiting_ai_synthesis',
attempts=v.attempts + 1,
last_error="AI extraction failed or returned empty"
)
)
logger.warning(f"⚠️ Sikertelen dúsítás: {make} {model}")
await db.commit()
async def run(self):
logger.info("🚀 Robot 2.2 (Alchemist) ONLINE - Prioritásos feldolgozás")
while True:
async with SessionLocal() as db:
# --- PRIORITÁSI LOGIKA (Megegyezik a Researcher botéval) ---
priorities = case(
(and_(VehicleModelDefinition.vehicle_type == 'car',
VehicleModelDefinition.make.in_(['SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL'])), 1),
(VehicleModelDefinition.vehicle_type == 'car', 2),
(and_(VehicleModelDefinition.vehicle_type == 'motorcycle',
VehicleModelDefinition.make.in_(['HONDA', 'YAMAHA', 'SUZUKI', 'KAWASAKI'])), 3),
else_=4
)
# Lekérdezés prioritás szerint, majd a legrégebben frissített rekordok szerint
stmt = select(VehicleModelDefinition.id).where(
VehicleModelDefinition.status == 'awaiting_ai_synthesis'
).order_by(priorities, VehicleModelDefinition.updated_at.asc()).limit(self.batch_size)
res = await db.execute(stmt)
ids = [r[0] for r in res.fetchall()]
if not ids:
# Ha üres a tartály, pihenünk és várunk a porszívóra
await asyncio.sleep(20)
continue
for vid in ids:
await self.synthesize_vehicle(vid)
# Quadro P4000 hűtés és Ollama API tehermentesítés
await asyncio.sleep(self.delay_between_records)
if __name__ == "__main__":
asyncio.run(AlchemistBot().run())

View File

@@ -1,208 +1,136 @@
import asyncio
import httpx
import logging
import json
import os
import datetime
from sqlalchemy import text
import sys
from sqlalchemy import text, select
from app.db.session import SessionLocal
from app.models.asset import AssetCatalog
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Robot-v1.0.13-Global-Hunter")
# Logolás beállítása
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
)
logger = logging.getLogger("Hunter-v2.4-Paginator")
class CatalogMaster:
"""
Master Hunter Robot v1.0.13 - Global Hunter Edition
- Holland (RDW), Brit (DVLA) és Amerikai (NHTSA) adatbázis integráció.
- Ratio-Filter: Kiszűri a 0.19-es kW/kg arányszámokat.
- Multi-field Power Discovery: Minden lehetséges mezőből kinyeri a kW-ot.
- Dinamikus évjárat kezelés a duplikációk ellen.
"""
# API Végpontok
class CatalogHunter:
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
US_NHTSA = "https://vpic.nhtsa.dot.gov/api/vehicles/DecodeVinValuesBatch/"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
UK_API_KEY = os.getenv("UK_DVLA_API_KEY")
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
CATEGORY_MAP = {
"Personenauto": "car",
"Motorfiets": "motorcycle",
"Bedrijfsauto": "truck",
"Vrachtwagen": "truck",
"Opleggertrekker": "truck",
"Bus": "bus",
"Aanhangwagen": "trailer",
"Oplegger": "trailer",
"Landbouw- of bosbouwtrekker": "agricultural",
"camper": "camper"
}
@classmethod
def clean_kw(cls, val):
"""Speciális kW tisztító: ignorálja az 1.0 alatti arányszámokat."""
async def get_total_count(cls, client, make_name):
"""Lekéri, összesen hány rekord létezik az adott márkához."""
query_filter = f"upper(merk) like '%{make_name.upper()}%'"
params = {
"$where": query_filter,
"$select": "count(*)"
}
try:
if val is None: return None
f_val = float(str(val).replace(',', '.'))
if 0 < f_val < 1.0: return None # Ez csak arányszám (kW/kg)
v = int(f_val)
return v if v > 0 else None
except (ValueError, TypeError):
return None
@classmethod
def clean_int(cls, val):
"""Általános egész szám tisztító."""
try:
if val is None: return None
return int(float(str(val).replace(',', '.')))
except (ValueError, TypeError):
return None
@classmethod
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
"""Univerzális API hívó sebességkorlátozással."""
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
try:
await asyncio.sleep(1.2) # Biztonsági késleltetés
if method == "POST":
resp = await client.post(url, json=json_data, timeout=30)
else:
resp = await client.get(url, params=params, timeout=30)
return resp.json() if resp.status_code in [200, 201] else []
except Exception as e:
logger.error(f"❌ API Hiba ({url}): {e}")
return []
@classmethod
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
"""Nemzetközi dúsítás: Holland -> Brit -> Amerikai sorrendben."""
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
# 1. HOLLAND (RDW) DÚSÍTÁS
fuel_data = await cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if fuel_data:
f0 = fuel_data[0]
if not res["kw"]:
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
res["euro"] = f0.get("uitlaatemissieniveau")
# 2. BRIT (DVLA) ELLENŐRZÉS (Ha van UK kulcs és még hiányzik adat)
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST", json_data={"registrationNumber": plate}, headers=cls.HEADERS_UK)
if uk_data:
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity")) # Brit adatok finomítása
res["euro"] = res["euro"] or uk_data.get("euroStatus")
# 3. AMERIKAI (NHTSA) KUTATÁS (Ha van alvázszám)
if vin and len(vin) == 17:
us_data = await cls.fetch_api(cls.US_NHTSA, params={"format": "json", "data": vin})
if us_data and "Results" in us_data:
# Az amerikai adatbázisból kinyerjük a lóerőt (HP), ha a kW még mindig nincs meg
hp = us_data["Results"][0].get("EngineHP")
if hp and not res["kw"]:
res["kw"] = int(float(hp) * 0.7457) # HP -> kW konverzió
# RDW Extra adatok (Tengely, Karosszéria)
axle = await cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if axle: res["axles"] = cls.clean_int(axle[0].get("aantal_assen"))
body = await cls.fetch_api(cls.RDW_BODY, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if body: res["body"] = body[0].get("carrosserie_omschrijving", "Standard")
return res
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
if resp.status_code == 200:
data = resp.json()
return int(data[0]['count'])
return 0
except Exception as e:
logger.error(f"⚠️ Nem sikerült a számlálás: {e}")
return 0
@classmethod
async def process_make(cls, db, task_id, make_name):
logger.info(f"🚀 >>> {make_name} GlobalHunter v1.0.13 INDUL...")
offset, limit, total_saved = 0, 1000, 0
unique_variants = {}
while True:
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
main_data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
if not main_data: break
for item in main_data:
plate = item.get("kenteken")
if not plate: continue
model = str(item.get("handelsbenaming", "Unknown")).upper()
ccm = cls.clean_int(item.get("cilinderinhoud"))
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
kw_candidate = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
raw_date = item.get("datum_eerste_toelating")
prod_year = int(str(raw_date)[:4]) if raw_date else 2024
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
if "kampeerwagen" in str(item.get("inrichting", "")).lower(): v_class = "camper"
# Variáns kulcs: Modell + CCM + Súly + kW + Év = Egyedi technikai ujjlenyomat
variant_key = f"{model}-{ccm}-{weight}-{v_class}-{kw_candidate}-{prod_year}"
if variant_key not in unique_variants:
unique_variants[variant_key] = {
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
"plate": plate, "main_kw": kw_candidate, "prod_year": prod_year,
"vin": item.get("vin") # Ha az RDW-ben benne van a VIN
}
if len(main_data) < limit or offset > 90000: break
offset += limit
logger.info(f"📊 {len(unique_variants)} egyedi variáns kutatása indul...")
for key, v in unique_variants.items():
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
try:
db_item = AssetCatalog(
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
year_from=v["prod_year"], euro_class=deep["euro"],
factory_data={
"source": "GlobalHunter-v1.0.13",
"sample_plate": v["plate"],
"enriched_at": str(datetime.datetime.now())
}
)
db.add(db_item)
await db.commit()
total_saved += 1
if total_saved % 50 == 0: logger.info(f"{total_saved} variáns elmentve.")
except Exception:
await db.rollback()
continue
clean_make = make_name.strip().upper()
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} rekord rögzítve.")
async with httpx.AsyncClient(timeout=60) as client:
# 1. LÉPÉS: Megszámoljuk az összes rekordot
total_available = await cls.get_total_count(client, clean_make)
logger.info(f"🚀 >>> {clean_make} feltérképezése: {total_available} variáns található az RDW-ben.")
if total_available == 0:
logger.warning(f"⚠️ {clean_make} márkához nem érkezett adat az API-tól.")
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
return
# 2. LÉPÉS: Lapozás (Pagination)
limit = 1000
offset = 0
total_added = 0
while offset < total_available:
logger.info(f"📑 Lapozás: {clean_make} | {offset} -> {offset + limit} (Összesen: {total_available})")
query_filter = f"upper(merk) like '%{clean_make}%'"
params = {
"$where": query_filter,
"$limit": limit,
"$offset": offset,
"$order": ":id" # Socrata stabil lapozáshoz javasolt
}
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
if resp.status_code != 200:
logger.error(f"❌ Hiba a lapozásnál ({offset}): {resp.status_code}")
break
batch = resp.json()
if not batch: break
# Feldolgozás
for item in batch:
res_make = str(item.get("merk", clean_make)).upper()
model = str(item.get("handelsbenaming", "Unknown")).upper()
ccm = int(float(item.get("cilinderinhoud") or 0))
kw = int(float(item.get("netto_maximum_vermogen") or 0))
# Deduplikáció check
stmt = select(VehicleModelDefinition.id).where(
VehicleModelDefinition.make == res_make,
VehicleModelDefinition.marketing_name == model,
VehicleModelDefinition.engine_capacity == ccm,
VehicleModelDefinition.power_kw == kw
).limit(1)
exists = (await db.execute(stmt)).scalar_one_or_none()
if not exists:
db.add(VehicleModelDefinition(
make=res_make,
technical_code=item.get("kenteken"),
marketing_name=model,
engine_capacity=ccm,
power_kw=kw if kw > 0 else None,
status="unverified",
source="HUNTER-v2.4-PAGINATED"
))
total_added += 1
await db.commit() # Lapvégi mentés
offset += limit
# 3. LÉPÉS: Befejezés
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"{clean_make} KÉSZ. {total_available} rekord átnézve, {total_added} új variáns stagingbe mentve.")
@classmethod
async def run(cls):
logger.info("🤖 Robot 1.0.13 (Global Hunter) ONLINE")
logger.info("🤖 Robot 1 (Hunter) ONLINE - Paginator v2.4")
while True:
async with SessionLocal() as db:
res = await db.execute(text("SELECT id, make FROM data.catalog_discovery WHERE status = 'pending' LIMIT 1"))
query = text("""
SELECT id, make FROM data.catalog_discovery
WHERE status = 'pending'
ORDER BY
CASE WHEN make IN ('SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL') THEN 1 ELSE 2 END,
id ASC
LIMIT 1 FOR UPDATE SKIP LOCKED
""")
res = await db.execute(query)
task = res.fetchone()
if task:
await cls.process_make(db, task[0], task[1])
else:
logger.info("😴 Várólista üres. Alvás 60 mp...")
await asyncio.sleep(60)
await asyncio.sleep(1)
await asyncio.sleep(20)
if __name__ == "__main__":
asyncio.run(CatalogMaster.run())
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,208 @@
import asyncio
import httpx
import logging
import json
import os
import datetime
from sqlalchemy import text
from app.db.session import SessionLocal
from app.models.asset import AssetCatalog
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Robot-v1.0.13-Global-Hunter")
class CatalogMaster:
"""
Master Hunter Robot v1.0.13 - Global Hunter Edition
- Holland (RDW), Brit (DVLA) és Amerikai (NHTSA) adatbázis integráció.
- Ratio-Filter: Kiszűri a 0.19-es kW/kg arányszámokat.
- Multi-field Power Discovery: Minden lehetséges mezőből kinyeri a kW-ot.
- Dinamikus évjárat kezelés a duplikációk ellen.
"""
# API Végpontok
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
US_NHTSA = "https://vpic.nhtsa.dot.gov/api/vehicles/DecodeVinValuesBatch/"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
UK_API_KEY = os.getenv("UK_DVLA_API_KEY")
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
CATEGORY_MAP = {
"Personenauto": "car",
"Motorfiets": "motorcycle",
"Bedrijfsauto": "truck",
"Vrachtwagen": "truck",
"Opleggertrekker": "truck",
"Bus": "bus",
"Aanhangwagen": "trailer",
"Oplegger": "trailer",
"Landbouw- of bosbouwtrekker": "agricultural",
"camper": "camper"
}
@classmethod
def clean_kw(cls, val):
"""Speciális kW tisztító: ignorálja az 1.0 alatti arányszámokat."""
try:
if val is None: return None
f_val = float(str(val).replace(',', '.'))
if 0 < f_val < 1.0: return None # Ez csak arányszám (kW/kg)
v = int(f_val)
return v if v > 0 else None
except (ValueError, TypeError):
return None
@classmethod
def clean_int(cls, val):
"""Általános egész szám tisztító."""
try:
if val is None: return None
return int(float(str(val).replace(',', '.')))
except (ValueError, TypeError):
return None
@classmethod
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
"""Univerzális API hívó sebességkorlátozással."""
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
try:
await asyncio.sleep(1.2) # Biztonsági késleltetés
if method == "POST":
resp = await client.post(url, json=json_data, timeout=30)
else:
resp = await client.get(url, params=params, timeout=30)
return resp.json() if resp.status_code in [200, 201] else []
except Exception as e:
logger.error(f"❌ API Hiba ({url}): {e}")
return []
@classmethod
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
"""Nemzetközi dúsítás: Holland -> Brit -> Amerikai sorrendben."""
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
# 1. HOLLAND (RDW) DÚSÍTÁS
fuel_data = await cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if fuel_data:
f0 = fuel_data[0]
if not res["kw"]:
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
res["euro"] = f0.get("uitlaatemissieniveau")
# 2. BRIT (DVLA) ELLENŐRZÉS (Ha van UK kulcs és még hiányzik adat)
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST", json_data={"registrationNumber": plate}, headers=cls.HEADERS_UK)
if uk_data:
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity")) # Brit adatok finomítása
res["euro"] = res["euro"] or uk_data.get("euroStatus")
# 3. AMERIKAI (NHTSA) KUTATÁS (Ha van alvázszám)
if vin and len(vin) == 17:
us_data = await cls.fetch_api(cls.US_NHTSA, params={"format": "json", "data": vin})
if us_data and "Results" in us_data:
# Az amerikai adatbázisból kinyerjük a lóerőt (HP), ha a kW még mindig nincs meg
hp = us_data["Results"][0].get("EngineHP")
if hp and not res["kw"]:
res["kw"] = int(float(hp) * 0.7457) # HP -> kW konverzió
# RDW Extra adatok (Tengely, Karosszéria)
axle = await cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if axle: res["axles"] = cls.clean_int(axle[0].get("aantal_assen"))
body = await cls.fetch_api(cls.RDW_BODY, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if body: res["body"] = body[0].get("carrosserie_omschrijving", "Standard")
return res
@classmethod
async def process_make(cls, db, task_id, make_name):
logger.info(f"🚀 >>> {make_name} GlobalHunter v1.0.13 INDUL...")
offset, limit, total_saved = 0, 1000, 0
unique_variants = {}
while True:
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
main_data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
if not main_data: break
for item in main_data:
plate = item.get("kenteken")
if not plate: continue
model = str(item.get("handelsbenaming", "Unknown")).upper()
ccm = cls.clean_int(item.get("cilinderinhoud"))
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
kw_candidate = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
raw_date = item.get("datum_eerste_toelating")
prod_year = int(str(raw_date)[:4]) if raw_date else 2024
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
if "kampeerwagen" in str(item.get("inrichting", "")).lower(): v_class = "camper"
# Variáns kulcs: Modell + CCM + Súly + kW + Év = Egyedi technikai ujjlenyomat
variant_key = f"{model}-{ccm}-{weight}-{v_class}-{kw_candidate}-{prod_year}"
if variant_key not in unique_variants:
unique_variants[variant_key] = {
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
"plate": plate, "main_kw": kw_candidate, "prod_year": prod_year,
"vin": item.get("vin") # Ha az RDW-ben benne van a VIN
}
if len(main_data) < limit or offset > 90000: break
offset += limit
logger.info(f"📊 {len(unique_variants)} egyedi variáns kutatása indul...")
for key, v in unique_variants.items():
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
try:
db_item = AssetCatalog(
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
year_from=v["prod_year"], euro_class=deep["euro"],
factory_data={
"source": "GlobalHunter-v1.0.13",
"sample_plate": v["plate"],
"enriched_at": str(datetime.datetime.now())
}
)
db.add(db_item)
await db.commit()
total_saved += 1
if total_saved % 50 == 0: logger.info(f"{total_saved} variáns elmentve.")
except Exception:
await db.rollback()
continue
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} rekord rögzítve.")
@classmethod
async def run(cls):
logger.info("🤖 Robot 1.0.13 (Global Hunter) ONLINE")
while True:
async with SessionLocal() as db:
res = await db.execute(text("SELECT id, make FROM data.catalog_discovery WHERE status = 'pending' LIMIT 1"))
task = res.fetchone()
if task:
await cls.process_make(db, task[0], task[1])
else:
logger.info("😴 Várólista üres. Alvás 60 mp...")
await asyncio.sleep(60)
await asyncio.sleep(1)
if __name__ == "__main__":
asyncio.run(CatalogMaster.run())

View File

@@ -0,0 +1,270 @@
import asyncio
import httpx
import logging
import json
import os
import datetime
import sys
from sqlalchemy import text
from app.db.session import SessionLocal
from app.models.asset import AssetCatalog
# --- KÉNYSZERÍTETT IDŐBÉLYEGES LOGOLÁS ---
# Töröljük az esetleges korábbi konfigurációkat, hogy az időbélyeg garantált legyen
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
stream=sys.stdout
)
logger = logging.getLogger("Robot-v1.4.1-Powerhouse")
class CatalogMaster:
"""
Master Hunter Robot v1.4.1 - Powerhouse Edition
- Párhuzamos Holland (RDW) és Amerikai (NHTSA Batch) Discovery.
- Garantált időbélyeges naplózás.
- Multi-Worker Safe (FOR UPDATE SKIP LOCKED).
- Rate Limit (429) védelem.
"""
# API Végpontok
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
US_BATCH = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json"
# BRIT API (Token után aktiválható)
UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
UK_API_KEY = os.getenv("UK_DVLA_API_KEY")
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
CATEGORY_MAP = {
"Personenauto": "car",
"Motorfiets": "motorcycle",
"Bedrijfsauto": "truck",
"Vrachtwagen": "truck",
"Opleggertrekker": "truck",
"Bus": "bus",
"Aanhangwagen": "trailer",
"Oplegger": "trailer",
"Landbouw- of bosbouwtrekker": "agricultural",
"camper": "camper"
}
# Szabályozzuk a párhuzamos dúsítást (egyszerre max 5 kérés robotpéldányonként)
semaphore = asyncio.Semaphore(5)
@classmethod
def clean_kw(cls, val):
try:
if val is None: return None
f_val = float(str(val).replace(',', '.'))
if 0 < f_val < 1.0: return None
v = int(f_val)
return v if v > 0 else None
except (ValueError, TypeError):
return None
@classmethod
def clean_int(cls, val):
try:
if val is None: return None
return int(float(str(val).replace(',', '.')))
except (ValueError, TypeError):
return None
@classmethod
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
"""Intelligens API hívó 429-es védelemmel és időzített logolással."""
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
for attempt in range(3):
try:
if method == "POST":
resp = await client.post(url, json=json_data, timeout=30)
else:
resp = await client.get(url, params=params, timeout=30)
if resp.status_code == 429:
wait_time = (attempt + 1) * 5
logger.warning(f"⚠️ RATE LIMIT! Várakozás {wait_time}mp: {url}")
await asyncio.sleep(wait_time)
continue
return resp.json() if resp.status_code in [200, 201] else []
except Exception as e:
logger.error(f"❌ API Hiba ({url}): {e}")
await asyncio.sleep(2)
return []
@classmethod
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
"""Mély dúsítás több forrásból párhuzamosan."""
async with cls.semaphore:
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
# --- 1. HOLLAND (RDW) DÚSÍTÁS ---
fuel_task = cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
axle_task = cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
fuel_data, axle_data = await asyncio.gather(fuel_task, axle_task)
if fuel_data:
f0 = fuel_data[0]
if not res["kw"]:
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
res["euro"] = f0.get("uitlaatemissieniveau")
if axle_data:
res["axles"] = cls.clean_int(axle_data[0].get("aantal_assen"))
# --- 2. BRIT (DVLA) ELLENŐRZÉS (AKTIVÁLHATÓ KULCCSAL) ---
"""
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST",
json_data={"registrationNumber": plate},
headers=cls.HEADERS_UK)
if uk_data and not isinstance(uk_data, list):
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity"))
res["euro"] = res["euro"] or uk_data.get("euroStatus")
"""
return res
@classmethod
async def discover_holland(cls, make_name, limit=1000):
"""Holland Discovery ág: rendszámok gyűjtése."""
offset, variants = 0, {}
while True:
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
if not data: break
for item in data:
plate = item.get("kenteken")
if not plate: continue
model = str(item.get("handelsbenaming", "Unknown")).upper()
ccm = cls.clean_int(item.get("cilinderinhoud"))
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
kw = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
raw_date = item.get("datum_eerste_toelating")
year = int(str(raw_date)[:4]) if raw_date else 2024
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
key = f"{model}-{ccm}-{weight}-{v_class}-{kw}-{year}"
if key not in variants:
variants[key] = {
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
"plate": plate, "main_kw": kw, "prod_year": year, "vin": item.get("vin")
}
if len(data) < limit: break
offset += limit
return variants
@classmethod
async def discover_usa_batch(cls, make_name):
"""Amerikai NHTSA Batch Discovery: Típusok gyűjtése."""
variants = {}
years = range(datetime.datetime.now().year - 5, datetime.datetime.now().year + 1)
async def fetch_year(year):
url = cls.US_BATCH.format(make=make_name.upper(), year=year)
logger.info(f"🇺🇸 USA Batch Discovery indítása: {make_name} ({year})")
data = await cls.fetch_api(url)
if data and "Results" in data:
for m in data["Results"]:
m_name = m.get("Model_Name", "Unknown").upper()
key = f"US-{m_name}-{year}"
if key not in variants:
variants[key] = {
"model": m_name, "ccm": None, "weight": None, "v_class": "car",
"plate": "US-DISCOVERY", "main_kw": None, "prod_year": year, "vin": None
}
await asyncio.gather(*(fetch_year(y) for y in years))
return variants
@classmethod
async def process_make(cls, db, task_id, make_name):
logger.info(f"🚀 >>> {make_name} Powerhouse v1.4.1 INDUL...")
# Párhuzamos Discovery
holland_task = cls.discover_holland(make_name)
usa_task = cls.discover_usa_batch(make_name)
holland_variants, usa_variants = await asyncio.gather(holland_task, usa_task)
all_variants = {**usa_variants, **holland_variants}
logger.info(f"📊 Összefésült variánsok száma: {len(all_variants)}")
async def enrich_and_save(v):
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
try:
db_item = AssetCatalog(
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
year_from=v["prod_year"], euro_class=deep["euro"],
factory_data={
"source": "Powerhouse-v1.4.1",
"discovery_nl": v["plate"] != "US-DISCOVERY",
"enriched_at": str(datetime.datetime.now())
}
)
return db_item
except Exception:
return None
# Párhuzamos dúsítás (Semaphore korláttal)
results = await asyncio.gather(*(enrich_and_save(v) for v in all_variants.values()))
total_saved = 0
for item in results:
if item:
db.add(item)
total_saved += 1
await db.commit()
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} egyedi rekord rögzítve.")
@classmethod
async def run(cls):
logger.info("🤖 Robot 1.4.1 (Powerhouse) ONLINE - Multi-Worker Safe Mode")
while True:
async with SessionLocal() as db:
# SKIP LOCKED védelem a párhuzamos futtatáshoz
query = text("""
SELECT id, make FROM data.catalog_discovery
WHERE status = 'pending'
LIMIT 1
FOR UPDATE SKIP LOCKED
""")
res = await db.execute(query)
task = res.fetchone()
if task:
task_id, make_name = task
await db.execute(
text("UPDATE data.catalog_discovery SET status = 'running' WHERE id = :id"),
{"id": task_id}
)
await db.commit()
await cls.process_make(db, task_id, make_name)
else:
logger.info("😴 Várólista üres vagy minden feladat foglalt. Alvás 60mp...")
await asyncio.sleep(60)
await asyncio.sleep(1)
if __name__ == "__main__":
asyncio.run(CatalogMaster.run())

View File

@@ -0,0 +1,272 @@
import asyncio
import httpx
import logging
import json
import os
import datetime
from sqlalchemy import text
from app.db.session import SessionLocal
from app.models.asset import AssetCatalog
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Robot-v1.4-Powerhouse")
class CatalogMaster:
"""
Master Hunter Robot v1.4 - Powerhouse Edition
- Párhuzamos Holland (RDW) és Amerikai (NHTSA Batch) Discovery.
- Előkészített, kikommentelt Brit (DVLA) integráció.
- Async Semaphore: Párhuzamos technikai dúsítás (egyszerre 10 szálon).
- Intelligens összefésülés a globális források között.
"""
# API Végpontok
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
# AMERIKAI BATCH API: Egyetlen hívással az összes modell évjárat szerint
US_BATCH = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json"
# BRIT API (Kikapcsolva a tokenig)
# UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
UK_API_KEY = os.getenv("UK_DVLA_API_KEY") # Jövőbeli token helye
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
# HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
CATEGORY_MAP = {
"Personenauto": "car",
"Motorfiets": "motorcycle",
"Bedrijfsauto": "truck",
"Vrachtwagen": "truck",
"Opleggertrekker": "truck",
"Bus": "bus",
"Aanhangwagen": "trailer",
"Oplegger": "trailer",
"Landbouw- of bosbouwtrekker": "agricultural",
"camper": "camper"
}
# Szabályozzuk a párhuzamos dúsítást, hogy ne tiltsanak le (egyszerre max 10 kérés)
semaphore = asyncio.Semaphore(5)
@classmethod
def clean_kw(cls, val):
try:
if val is None: return None
f_val = float(str(val).replace(',', '.'))
if 0 < f_val < 1.0: return None
v = int(f_val)
return v if v > 0 else None
except (ValueError, TypeError):
return None
@classmethod
def clean_int(cls, val):
try:
if val is None: return None
return int(float(str(val).replace(',', '.')))
except (ValueError, TypeError):
return None
@classmethod
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
for attempt in range(3): # 3-szor próbáljuk újra, ha kell
try:
if method == "POST":
resp = await client.post(url, json=json_data, timeout=30)
else:
resp = await client.get(url, params=params, timeout=30)
if resp.status_code == 429: # HOPPÁ, túl gyorsak vagyunk!
wait_time = (attempt + 1) * 5 # Egyre többet vár: 5s, 10s...
logger.warning(f"⚠️ RDW limit elérve! Pihenő {wait_time} mp...")
await asyncio.sleep(wait_time)
continue
return resp.json() if resp.status_code in [200, 201] else []
except Exception as e:
logger.error(f"❌ API Hiba ({url}): {e}")
await asyncio.sleep(2)
return []
@classmethod
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
"""Mély dúsítás párhuzamos forrásokból."""
async with cls.semaphore:
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
# --- 1. HOLLAND (RDW) DÚSÍTÁS ---
fuel_task = cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
axle_task = cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
# Holland adatok párhuzamos lekérése
fuel_data, axle_data = await asyncio.gather(fuel_task, axle_task)
if fuel_data:
f0 = fuel_data[0]
if not res["kw"]:
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
res["euro"] = f0.get("uitlaatemissieniveau")
if axle_data:
res["axles"] = cls.clean_int(axle_data[0].get("aantal_assen"))
# --- 2. BRIT (DVLA) ELLENŐRZÉS (KIKOMMENTELVE A TOKENIG) ---
"""
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST",
json_data={"registrationNumber": plate},
headers=cls.HEADERS_UK)
if uk_data and not isinstance(uk_data, list):
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity"))
res["euro"] = res["euro"] or uk_data.get("euroStatus")
"""
return res
@classmethod
async def discover_holland(cls, make_name, limit=1000):
"""Holland Discovery ág."""
offset, variants = 0, {}
while True:
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
if not data: break
for item in data:
plate = item.get("kenteken")
if not plate: continue
model = str(item.get("handelsbenaming", "Unknown")).upper()
ccm = cls.clean_int(item.get("cilinderinhoud"))
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
kw = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
raw_date = item.get("datum_eerste_toelating")
year = int(str(raw_date)[:4]) if raw_date else 2024
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
key = f"{model}-{ccm}-{weight}-{v_class}-{kw}-{year}"
if key not in variants:
variants[key] = {
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
"plate": plate, "main_kw": kw, "prod_year": year, "vin": item.get("vin")
}
if len(data) < limit: break
offset += limit
return variants
@classmethod
async def discover_usa_batch(cls, make_name):
"""Amerikai NHTSA Batch Discovery ág (2020-2025 évjáratokra)."""
variants = {}
# Az utolsó 5 évjáratot nézzük a legfrissebb modellekért
years = range(datetime.datetime.now().year - 5, datetime.datetime.now().year + 1)
async def fetch_year(year):
url = cls.US_BATCH.format(make=make_name.upper(), year=year)
data = await cls.fetch_api(url)
if data and "Results" in data:
for m in data["Results"]:
m_name = m.get("Model_Name", "Unknown").upper()
# US adatoknál nincs rendszám, de a Robot 2 dúsítani fogja ha kell
key = f"US-{m_name}-{year}"
variants[key] = {
"model": m_name, "ccm": None, "weight": None, "v_class": "car",
"plate": "US-DISCOVERY", "main_kw": None, "prod_year": year, "vin": None
}
await asyncio.gather(*(fetch_year(y) for y in years))
return variants
@classmethod
async def process_make(cls, db, task_id, make_name):
logger.info(f"🚀 >>> {make_name} Powerhouse v1.4 INDUL...")
# PÁRHUZAMOS DISCOVERY: Holland és USA egyszerre
holland_task = cls.discover_holland(make_name)
usa_task = cls.discover_usa_batch(make_name)
holland_variants, usa_variants = await asyncio.gather(holland_task, usa_task)
# Összefésülés (Holland élvez elsőbbséget a rendszám miatt)
all_variants = {**usa_variants, **holland_variants}
logger.info(f"📊 Összesen {len(all_variants)} egyedi variáns (NL: {len(holland_variants)}, US: {len(usa_variants)})")
# PÁRHUZAMOS DÚSÍTÁS
async def enrich_and_save(v):
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
try:
db_item = AssetCatalog(
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
year_from=v["prod_year"], euro_class=deep["euro"],
factory_data={
"source": "Powerhouse-v1.4",
"discovery_nl": v["plate"] != "US-DISCOVERY",
"enriched_at": str(datetime.datetime.now())
}
)
return db_item
except Exception:
return None
# Egyszerre indítjuk a dúsításokat (A semaphore korlátozza a szálakat)
results = await asyncio.gather(*(enrich_and_save(v) for v in all_variants.values()))
# Mentés
total_saved = 0
for item in results:
if item:
db.add(item)
total_saved += 1
await db.commit()
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} rekord rögzítve.")
@classmethod
async def run(cls):
logger.info("🤖 Robot 1.4 (Powerhouse) ONLINE - Multi-Worker Safe")
while True:
async with SessionLocal() as db:
# 1. 'FOR UPDATE SKIP LOCKED' - Megfogjuk a sort és lelakatoljuk,
# de a többi robot átugorja, amit mi már fogunk.
query = text("""
SELECT id, make FROM data.catalog_discovery
WHERE status = 'pending'
LIMIT 1
FOR UPDATE SKIP LOCKED
""")
res = await db.execute(query)
task = res.fetchone()
if task:
task_id, make_name = task
# 2. Azonnal átállítjuk 'running'-ra a tranzakción belül,
# így senki más nem nyúl hozzá.
await db.execute(
text("UPDATE data.catalog_discovery SET status = 'running' WHERE id = :id"),
{"id": task_id}
)
await db.commit() # Itt véglegesítjük a foglalást
# 3. Indulhat a tényleges munka
await cls.process_make(db, task_id, make_name)
else:
logger.info("😴 Várólista üres (vagy minden sor foglalt). Alvás 60 mp...")
await asyncio.sleep(60)
await asyncio.sleep(1)
if __name__ == "__main__":
asyncio.run(CatalogMaster.run())

View File

@@ -0,0 +1,117 @@
import asyncio
import logging
import warnings
import os
from sqlalchemy import select, update, and_, func, or_, case # Explicit case import
from app.db.session import SessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
import httpx
# 1. KRITIKUS JAVÍTÁS: A figyelmeztetések globális elnyomása az import előtt
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
from duckduckgo_search import DDGS
# Logolás beállítása, hogy lássuk a haladást
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
logger = logging.getLogger("Robot-Researcher-v2.1")
class ResearcherBot:
def __init__(self):
self.batch_size = 15
self.max_parallel_queries = 5
async def fetch_source(self, label, query):
"""Egyedi forrás lekérése a DuckDuckGo-tól."""
try:
def search():
# Az újabb verziókban a DDGS() hívás így a legstabilabb
with DDGS() as ddgs:
results = ddgs.text(query, max_results=3)
return [r['body'] for r in results] if results else []
results = await asyncio.to_thread(search)
if not results:
return f"=== SOURCE: {label} | NO DATA FOUND ===\n\n"
content = f"=== SOURCE: {label} | QUERY: {query} ===\n"
content += "\n---\n".join(results)
content += "\n=== END SOURCE ===\n\n"
return content
except Exception as e:
logger.error(f"❌ Keresési hiba ({label}): {e}")
return f"=== SOURCE: {label} ERROR: {str(e)} ===\n\n"
async def research_vehicle(self, vehicle_id):
async with SessionLocal() as db:
res = await db.execute(select(VehicleModelDefinition).where(VehicleModelDefinition.id == vehicle_id))
v = res.scalar_one_or_none()
if not v: return
make, model = v.make, v.marketing_name
# Jelöljük be, hogy a kutatás folyamatban van
await db.execute(update(VehicleModelDefinition).where(VehicleModelDefinition.id == vehicle_id).values(status='research_in_progress'))
await db.commit()
logger.info(f"🔎 Kutatás indul: {make} {model}")
queries = [
("TECH_SPECS", f"{make} {model} technical specifications engine power"),
("MAINTENANCE", f"{make} {model} service manual oil capacity spark plug"),
("TIRES_BRAKES", f"{make} {model} tire size brake pad type"),
("FLUIDS", f"{make} {model} coolant quantity transmission oil")
]
tasks = [self.fetch_source(label, q) for label, q in queries]
search_results = await asyncio.gather(*tasks)
full_context = "".join(search_results)
async with SessionLocal() as db:
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
raw_search_context=full_context,
status='awaiting_ai_synthesis', # Itt adjuk át a Robot 2.2-nek (Alchemist)
updated_at=func.now()
)
)
await db.commit()
logger.info(f"✅ Kutatás kész, adat a tartályban: {make} {model}")
async def run(self):
logger.info("🚀 Robot 2.1 (Researcher) ONLINE")
while True:
async with SessionLocal() as db:
# 2. KRITIKUS JAVÍTÁS: func.case helyett az explicit case() használata
# Ez javítja a "TypeError: got an unexpected keyword argument 'else_'" hibát
priorities = case(
(and_(VehicleModelDefinition.vehicle_type == 'car',
VehicleModelDefinition.make.in_(['SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL'])), 1),
(VehicleModelDefinition.vehicle_type == 'car', 2),
(and_(VehicleModelDefinition.vehicle_type == 'motorcycle',
VehicleModelDefinition.make.in_(['HONDA', 'YAMAHA', 'SUZUKI', 'KAWASAKI'])), 3),
else_=4
)
stmt = select(VehicleModelDefinition.id).where(
or_(VehicleModelDefinition.status == 'unverified', VehicleModelDefinition.status == 'awaiting_research')
).order_by(priorities).limit(self.batch_size)
res = await db.execute(stmt)
ids = [r[0] for r in res.fetchall()]
if not ids:
logger.info("💤 Nincs több feldolgozandó feladat, pihenés...")
await asyncio.sleep(60)
continue
# Batch feldolgozás indítása párhuzamosan
await asyncio.gather(*[self.research_vehicle(rid) for rid in ids])
# Rövid szünet a keresőmotorok kímélése érdekében
await asyncio.sleep(2)
if __name__ == "__main__":
asyncio.run(ResearcherBot().run())

View File

@@ -0,0 +1,83 @@
import asyncio
import httpx
import logging
import os
from sqlalchemy import text
from app.db.session import SessionLocal
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
logger = logging.getLogger("Robot-0-Strategist")
class Robot0Strategist:
RDW_API = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
# Holland típusok leképezése a mi kategóriáinkra a kért sorrendben
CATEGORIES = [
{"name": "car", "rdw_types": ["'Personenauto'"]},
{"name": "motorcycle", "rdw_types": ["'Motorfiets'"]},
{"name": "truck", "rdw_types": ["'Bedrijfswagen'", "'Vrachtwagen'", "'Opleggertrekker'"]},
{"name": "other", "rdw_types": ["NOT IN ('Personenauto', 'Motorfiets', 'Bedrijfswagen', 'Vrachtwagen', 'Opleggertrekker')"]}
]
async def get_popular_makes(self, vehicle_class, rdw_types):
"""Lekéri az adott kategória legnépszerűbb márkáit az RDW-től."""
# SQL-szerű szűrés az API-n keresztül
type_filter = " OR ".join([f"voertuigsoort = {t}" for t in rdw_types])
if "NOT IN" in rdw_types[0]: # Speciális kezelés az 'egyéb' kategóriához
type_filter = f"voertuigsoort {rdw_types[0]}"
params = {
"$select": "merk, count(*)",
"$where": type_filter,
"$group": "merk",
"$order": "count DESC",
"$limit": 500 # Kategóriánként az 500 legfontosabb márka bőven elég
}
async with httpx.AsyncClient(timeout=30) as client:
try:
resp = await client.get(self.RDW_API, params=params, headers=self.HEADERS)
if resp.status_code == 200:
return resp.json()
return []
except Exception as e:
logger.error(f"❌ Hiba a {vehicle_class} lekérdezésekor: {e}")
return []
async def run(self):
logger.info("🚀 Robot 0 (Strategist) INDUL - Piaci alapú sorrend felállítása...")
async with SessionLocal() as db:
# 1. Töröljük a jelenlegi várólistát, hogy tiszta lappal induljunk (opcionális)
# await db.execute(text("DELETE FROM data.catalog_discovery WHERE status = 'pending'"))
for category in self.CATEGORIES:
v_class = category["name"]
logger.info(f"📊 {v_class.upper()} kategória elemzése...")
makes = await self.get_popular_makes(v_class, category["rdw_types"])
added_count = 0
for item in makes:
make_name = item.get("merk")
if not make_name: continue
# Beillesztés a Discovery táblába
# A prioritást az ID-k sorrendje fogja adni, amit Robot 1 követ
await db.execute(text("""
INSERT INTO data.catalog_discovery (make, model, vehicle_class, status, source)
VALUES (:make, 'ALL_MODELS', :class, 'pending', 'ROBOT-0-POPULARITY')
ON CONFLICT (make, model, vehicle_class) DO UPDATE
SET status = 'pending' WHERE catalog_discovery.status != 'processed'
"""), {"make": make_name.upper(), "class": v_class})
added_count += 1
await db.commit()
logger.info(f"{v_class.upper()}: {added_count} márka sorba állítva a népszerűség alapján.")
logger.info("🏁 Robot 0 végzett. A Discovery tábla készen áll a Robot 1 (Hunter) számára!")
if __name__ == "__main__":
asyncio.run(Robot0Strategist().run())

View File

@@ -2,159 +2,159 @@ import asyncio
import httpx
import logging
import os
import hashlib
from datetime import datetime, timezone
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, text
from sqlalchemy import select, text, update
from app.db.session import SessionLocal
# Modellek - Az új v1.3 struktúra
from app.models.service import ServiceStaging, DiscoveryParameter
# Naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("Robot-v1.3-ContinentalScout")
logger = logging.getLogger("Robot-v1.3.1-ContinentalScout")
class ServiceHunter:
"""
Robot v1.3.0: Continental Scout.
EU-szintű felderítő motor, Discovery tábla alapú vezérléssel.
Robot v1.3.1: Continental Scout (Grid Search Edition)
- Dinamikus rácsbejárás a sűrű területek lefedésére.
- Ujjlenyomat-alapú deduplikáció.
- Bővített kulcsszókezelés.
"""
OVERPASS_URL = "http://overpass-api.de/api/interpreter"
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
@classmethod
async def get_coordinates(cls, city, country_code):
"""Város központjának lekérése a keresés indításához."""
params = {"address": f"{city}, {country_code}", "key": cls.GOOGLE_API_KEY}
async with httpx.AsyncClient() as client:
resp = await client.get(cls.GEOCODE_URL, params=params)
if resp.status_code == 200:
results = resp.json().get("results")
if results:
loc = results[0]["geometry"]["location"]
return loc["lat"], loc["lng"]
return None, None
def generate_fingerprint(cls, name: str, city: str, street: str) -> str:
"""Egyedi ujjlenyomat készítése a duplikációk kiszűrésére."""
raw_string = f"{str(name).lower()}|{str(city).lower()}|{str(street).lower()[:5]}"
return hashlib.md5(raw_string.encode()).hexdigest()
@classmethod
async def get_city_bounds(cls, city, country_code):
"""Város befoglaló téglalapjának (Bounding Box) lekérése Nominatim-al."""
url = "https://nominatim.openstreetmap.org/search"
params = {"city": city, "country": country_code, "format": "json"}
async with httpx.AsyncClient(headers={"User-Agent": "ServiceFinder-Scout/1.0"}) as client:
resp = await client.get(url, params=params)
if resp.status_code == 200 and resp.json():
bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon]
return [float(x) for x in bbox]
return None
@classmethod
async def run_grid_search(cls, db, task):
"""Rács-alapú bejárás a városon belül."""
bbox = await cls.get_city_bounds(task.city, task.country_code)
if not bbox: return
# 1km-es lépések generálása (kb. 0.01 fok)
lat_step = 0.015
lon_step = 0.02
curr_lat = bbox[0]
while curr_lat < bbox[1]:
curr_lon = bbox[2]
while curr_lon < bbox[3]:
logger.info(f"🛰️ Rács-cella pásztázása: {curr_lat}, {curr_lon} - Kulcsszó: {task.keyword}")
places = await cls.get_google_places(curr_lat, curr_lon, task.keyword)
for p in places:
# Adatok kinyerése és tisztítása
name = p.get('displayName', {}).get('text')
full_addr = p.get('formattedAddress', '')
# Ujjlenyomat generálás
f_print = cls.generate_fingerprint(name, task.city, full_addr)
await cls.save_to_staging(db, {
"external_id": p.get('id'),
"name": name,
"full_address": full_addr,
"phone": p.get('internationalPhoneNumber'),
"website": p.get('websiteUri'),
"fingerprint": f_print,
"city": task.city,
"source": "google",
"raw": p,
"trust": 30
})
curr_lon += lon_step
await asyncio.sleep(0.5) # API védelem
curr_lat += lat_step
@classmethod
async def get_google_places(cls, lat, lon, keyword):
"""Google Places New API - Javított, 400-as hiba elleni védelemmel."""
"""Google Places New API hívás rács-pontra."""
if not cls.GOOGLE_API_KEY: return []
headers = {
"Content-Type": "application/json",
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
"X-Goog-FieldMask": "places.displayName,places.id,places.types,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
"X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
}
# A 'keyword' a TextQuery-hez kellene, a SearchNearby-nél típusokat (includedTypes) használunk.
# EU szintű trükk: Ha nincs pontos típus, a 'car_repair' az alapértelmezett.
payload = {
"includedTypes": ["car_repair", "gas_station", "car_wash", "motorcycle_repair"],
"includedTypes": ["car_repair", "motorcycle_repair"],
"maxResultCount": 20,
"locationRestriction": {
"circle": {
"center": {"latitude": lat, "longitude": lon},
"radius": 5000.0 # 5km körzet
"radius": 1500.0 # 1.5km sugarú kör a fedés érdekében
}
}
}
async with httpx.AsyncClient() as client:
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
if resp.status_code == 200:
return resp.json().get("places", [])
else:
logger.error(f"❌ Google API hiba ({resp.status_code}): {resp.text}")
return []
return resp.json().get("places", []) if resp.status_code == 200 else []
@classmethod
async def save_to_staging(cls, db: AsyncSession, data: dict):
"""Mentés a Staging táblába 9-mezős bontással."""
stmt = select(ServiceStaging).where(ServiceStaging.external_id == str(data['external_id']))
if (await db.execute(stmt)).scalar_one_or_none(): return
"""Mentés ujjlenyomat ellenőrzéssel."""
# 1. Megnézzük, létezik-e már ez az ujjlenyomat
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == data['fingerprint'])
existing = (await db.execute(stmt)).scalar_one_or_none()
if existing:
# Csak a bizalmi pontot növeljük és az utolsó észlelést frissítjük
existing.trust_score += 5
return
new_entry = ServiceStaging(
name=data['name'],
source=data['source'],
external_id=str(data['external_id']),
# Itt történik a 9-mezős bontás (ha érkezik adat)
postal_code=data.get('zip'),
city=data.get('city'),
street_name=data.get('street'),
street_type=data.get('street_type', 'utca'),
house_number=data.get('number'),
full_address=data.get('full_address'),
contact_phone=data.get('phone'),
website=data.get('website'),
fingerprint=data['fingerprint'],
city=data['city'],
full_address=data['full_address'],
contact_phone=data['phone'],
website=data['website'],
raw_data=data.get('raw', {}),
status="pending",
trust_score=data.get('trust', 10)
trust_score=data.get('trust', 30)
)
db.add(new_entry)
await db.flush()
@classmethod
async def run(cls):
logger.info("🤖 Robot v1.3.0: Continental Scout elindult...")
logger.info("🤖 Continental Scout v1.3.1 - Grid Engine INDUL...")
while True:
async with SessionLocal() as db:
try:
await db.execute(text("SET search_path TO data, public"))
# 1. Paraméterek lekérése a táblából
stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True)
tasks = (await db.execute(stmt)).scalars().all()
for task in tasks:
logger.info(f"🔎 Felderítés: {task.city} ({task.country_code}) -> {task.keyword}")
logger.info(f"🔎 Mélyfúrás indítása: {task.city} -> {task.keyword}")
await cls.run_grid_search(db, task)
# Koordináták beszerzése a kereséshez
lat, lon = await cls.get_coordinates(task.city, task.country_code)
if not lat: continue
# --- GOOGLE FÁZIS ---
google_places = await cls.get_google_places(lat, lon, task.keyword)
for p in google_places:
await cls.save_to_staging(db, {
"external_id": p.get('id'),
"name": p.get('displayName', {}).get('text'),
"full_address": p.get('formattedAddress'),
"phone": p.get('internationalPhoneNumber'),
"website": p.get('websiteUri'),
"source": "google",
"raw": p,
"trust": 30
})
# --- OSM FÁZIS (EU kompatibilis lekérdezés) ---
osm_query = f"""[out:json][timeout:60];
(nwr["amenity"~"car_repair|fuel"](around:5000, {lat}, {lon}););
out center;"""
async with httpx.AsyncClient() as client:
resp = await client.post(cls.OVERPASS_URL, data={"data": osm_query})
if resp.status_code == 200:
for el in resp.json().get("elements", []):
t = el.get("tags", {})
await cls.save_to_staging(db, {
"external_id": f"osm_{el['id']}",
"name": t.get('name', 'Ismeretlen szerviz'),
"city": t.get('addr:city', task.city),
"zip": t.get('addr:postcode'),
"street": t.get('addr:street'),
"number": t.get('addr:housenumber'),
"source": "osm",
"raw": el,
"trust": 15
})
task.last_run_at = datetime.now(timezone.utc)
await db.commit()
logger.info(f"{task.city} felderítve.")
except Exception as e:
logger.error(f"💥 Kritikus hiba a ciklusban: {e}")
logger.error(f"💥 Hiba: {e}")
await db.rollback()
logger.info("😴 Minden aktív feladat kész. Alvás 1 órán át...")
await asyncio.sleep(3600)
if __name__ == "__main__":

View File

@@ -3,113 +3,169 @@ import httpx
import logging
import os
import datetime
from sqlalchemy import select, and_
from sqlalchemy.exc import IntegrityError
import random
import sys
from sqlalchemy import select, and_, update, text, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.session import SessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.models.asset import AssetCatalog
from app.services.ai_service import AIService
from duckduckgo_search import DDGS
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Robot-Bulk-Master")
# --- SZIGORÚ NAPLÓZÁS KONFIGURÁCIÓ ---
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s.%(msecs)03d [%(levelname)s] Alchemist: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
stream=sys.stdout
)
logger = logging.getLogger("Robot-Enricher-v1.3.0")
class TechEnricher:
API_URL = "https://opendata.rdw.nl/resource/kyri-nuah.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
"""
Industrial TechEnricher v1.3.0
- Fix: Deadlock elkerülése izolált session-kezeléssel.
- Logika: Napi 500 AI hívás, Smart Merge, Web Fallback.
"""
def __init__(self):
self.max_attempts = 5
self.batch_size = 15
self.daily_ai_limit = 500
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
def check_budget(self) -> bool:
if datetime.date.today() > self.last_reset_date:
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
return self.ai_calls_today < self.daily_ai_limit
def is_data_sane(self, data: dict) -> bool:
try:
if not data: return False
ccm = int(data.get("ccm", 0) or 0)
kw = int(data.get("kw", 0) or 0)
if ccm > 15000 or kw > 2000: return False
return True
except: return False
async def get_web_wisdom(self, make: str, model: str) -> str:
"""Keresés a neten izolált szálon (nem blokkolja az aszinkron loopot)."""
query = f"{make} {model} technical specs maintenance oil qty tire size"
try:
def sync_search():
with DDGS() as ddgs:
return "\n".join([r['body'] for r in ddgs.text(query, max_results=3)])
return await asyncio.to_thread(sync_search)
except Exception as e:
logger.warning(f"🌐 Web hiba ({make}): {e}")
return ""
async def process_single_record(self, record_id: int):
"""
Dúsítási folyamat 3 szigorúan elválasztott lépésben a fagyás ellen:
1. Adat lekérése és DB bezárása.
2. AI munka (DB nélkül).
3. Mentés új sessionben.
"""
# --- 1. LÉPÉS: ADAT LEKÉRÉSE ---
async with SessionLocal() as db:
stmt = select(VehicleModelDefinition).where(VehicleModelDefinition.id == record_id)
res = await db.execute(stmt)
rec = res.scalar_one_or_none()
if not rec: return
make, m_name, v_type = rec.make, rec.marketing_name, (rec.vehicle_type or "car")
logger.info(f"🧪 >>> Dúsítás indítása: {make} {m_name}")
# --- 2. LÉPÉS: AI MUNKA (DB session itt nincs nyitva!) ---
try:
# AIService hívása a kötelező 4. 'sources' paraméterrel
ai_data = await AIService.get_clean_vehicle_data(make, m_name, v_type, {})
if not ai_data or not ai_data.get("kw"):
logger.info(f"🔍 AI bizonytalan, webes dúsítás indul: {make} {m_name}")
web_info = await self.get_web_wisdom(make, m_name)
ai_data = await AIService.get_clean_vehicle_data(make, m_name, v_type, {"web_context": web_info})
if not ai_data: raise ValueError("Az AI nem adott értékelhető választ.")
# --- 3. LÉPÉS: MENTÉS (Új session nyitása) ---
async with SessionLocal() as db:
# MDM (AssetCatalog) Smart Merge
cat_stmt = select(AssetCatalog).where(and_(
AssetCatalog.make == make.upper(),
AssetCatalog.model == ai_data.get("marketing_name", m_name)[:50],
AssetCatalog.power_kw == ai_data.get("kw")
)).limit(1)
if not (await db.execute(cat_stmt)).scalar_one_or_none():
db.add(AssetCatalog(
make=make.upper(),
model=ai_data.get("marketing_name", m_name)[:50],
power_kw=ai_data.get("kw"),
engine_capacity=ai_data.get("ccm"),
factory_data=ai_data
))
logger.info(f"✅ Mentve az MDM-be: {make} {m_name}")
# Staging frissítése
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
status="ai_enriched",
technical_code=ai_data.get("technical_code") or f"GEN-{record_id}",
engine_capacity=ai_data.get("ccm"),
power_kw=ai_data.get("kw"),
updated_at=func.now()
)
)
await db.commit()
self.ai_calls_today += 1
except Exception as e:
logger.error(f"🚨 Hiba a(z) {record_id} rekordnál: {e}")
async with SessionLocal() as db:
await db.execute(update(VehicleModelDefinition).where(VehicleModelDefinition.id == record_id).values(
attempts=VehicleModelDefinition.attempts + 1,
last_error=str(e)[:200],
status=text("CASE WHEN attempts >= 4 THEN 'suspended' ELSE 'unverified' END"),
updated_at=func.now()
))
await db.commit()
async def run(self):
logger.info(f"🚀 Robot 2 v1.3.0 ONLINE (Limit: {self.daily_ai_limit})")
while True:
if not self.check_budget():
await asyncio.sleep(3600); continue
@classmethod
async def fetch_rdw_tech_data(cls, make, model):
params = {"merk": make.upper(), "handelsbenaming": str(model).strip().upper(), "$limit": 1}
async with httpx.AsyncClient(headers=cls.HEADERS) as client:
try:
resp = await client.get(cls.API_URL, params=params, timeout=15)
return resp.json()[0] if resp.status_code == 200 and resp.json() else None
except: return None
@classmethod
async def run(cls):
logger.info("🚀 Master-Merge Robot FOLYAMATOS ÜZEMMÓD INDUL...")
while True: # Folyamatos ciklus, amíg el nem fogy az adat
async with SessionLocal() as main_db:
stmt = select(VehicleModelDefinition.id).where(
VehicleModelDefinition.status == "unverified"
).limit(50) # Egyszerre 50 ID-t foglalunk le
res = await main_db.execute(stmt)
ids = res.scalars().all()
if not ids:
logger.info("🏁 Minden rekord feldolgozva. A robot megáll.")
break
logger.info(f"📦 Új csomag indítása: {len(ids)} rekord.")
for m_id in ids:
async with SessionLocal() as db:
try:
current = await db.get(VehicleModelDefinition, m_id)
if not current: continue
# Csak az ID-kat kérjük le, hogy ne tartsuk nyitva a session-t a dúsítás alatt
stmt = select(VehicleModelDefinition.id).where(and_(
VehicleModelDefinition.status == "unverified",
VehicleModelDefinition.attempts < self.max_attempts
)).limit(self.batch_size)
ids = [r[0] for r in (await db.execute(stmt)).fetchall()]
logger.info(f"🧪 Feldolgozás: {current.make} {current.marketing_name} (ID: {m_id})")
rdw_data = await cls.fetch_rdw_tech_data(current.make, current.marketing_name)
if rdw_data:
current.engine_capacity = int(float(rdw_data.get("cilinderinhoud", 0))) or current.engine_capacity
current.power_kw = int(float(rdw_data.get("netto_maximum_vermogen_kw", 0))) or current.power_kw
if not ids:
await asyncio.sleep(60); continue
ai_data = await AIService.get_clean_vehicle_data(current.make, current.marketing_name, current.vehicle_type)
if ai_data:
tech_code = ai_data.get("technical_code") or "N/A"
new_ccm = ai_data.get("ccm") or current.engine_capacity
logger.info(f"📦 Batch indul: {len(ids)} rekord.")
for rid in ids:
await self.process_single_record(rid)
await asyncio.sleep(random.uniform(10.0, 30.0)) # VGA kímélése
master_record = None
if tech_code and tech_code != "N/A":
stmt_master = select(VehicleModelDefinition).where(and_(
VehicleModelDefinition.make == current.make,
VehicleModelDefinition.technical_code == tech_code,
VehicleModelDefinition.engine_capacity == new_ccm,
VehicleModelDefinition.status == 'ai_enriched',
VehicleModelDefinition.id != m_id
))
master_record = (await db.execute(stmt_master)).scalar_one_or_none()
if master_record:
logger.info(f"🔗 Merge: ID:{m_id} -> Master ID:{master_record.id}")
syns = set(master_record.synonyms or [])
syns.update(ai_data.get("synonyms", []))
syns.add(current.marketing_name)
master_record.synonyms = list(syns)
current.status = "duplicate"
current.parent_id = master_record.id
else:
current.technical_code = tech_code if tech_code != "N/A" else f"N/A-{m_id}"
current.marketing_name = ai_data.get("marketing_name", current.marketing_name)
current.engine_capacity = new_ccm
current.power_kw = ai_data.get("kw") or current.power_kw
current.year_from = ai_data.get("year_from")
current.year_to = ai_data.get("year_to")
current.synonyms = ai_data.get("synonyms", [])
if ai_data.get("maintenance"):
old_spec = current.specifications or {}
old_spec.update(ai_data.get("maintenance"))
current.specifications = old_spec
current.status = "ai_enriched"
else:
if not current.technical_code:
current.technical_code = f"UNKNOWN-{m_id}"
current.updated_at = datetime.datetime.now()
await db.commit()
logger.info(f"✅ Mentve (ID: {m_id})")
except Exception as e:
await db.rollback()
logger.error(f"❌ Hiba ID:{m_id}: {e}")
finally:
await db.close()
except Exception as e:
logger.error(f"🚨 Főciklus hiba: {e}")
await asyncio.sleep(30)
if __name__ == "__main__":
asyncio.run(TechEnricher.run())
enricher = TechEnricher()
asyncio.run(enricher.run())

View File

@@ -0,0 +1,64 @@
import asyncio
import logging
import sys
import datetime
from sqlalchemy import select, and_, text, update
from sqlalchemy.orm import joinedload
from app.db.session import SessionLocal
from app.models.asset import Asset, AssetCatalog
from app.services.ai_service import AIService
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Auditor: %(message)s', stream=sys.stdout)
logger = logging.getLogger("VIN-Auditor-v1.3.0")
class VINAuditor:
"""
VIN Auditor v1.3.0
- Alvázszám alapú hitelesítés és MDM szinkron.
"""
@classmethod
async def audit_asset(cls, asset_id: int):
# 1. ADATGYŰJTÉS ÉS SESSION ZÁRÁS
async with SessionLocal() as db:
stmt = select(Asset).options(joinedload(Asset.catalog)).where(Asset.id == asset_id)
asset = (await db.execute(stmt)).scalar_one_or_none()
if not asset or not asset.vin: return
make, vin, current_kw = asset.catalog.make, asset.vin, asset.catalog.power_kw
# 2. AI FÁZIS (Izolált hívás)
try:
logger.info(f"🛡️ VIN Audit indul: {vin}")
truth = await AIService.get_clean_vehicle_data(make, vin, "vin_audit", {"vin": vin})
if truth and truth.get("kw"):
# 3. MENTÉSI FÁZIS (Új session)
async with SessionLocal() as db:
real_kw = int(truth["kw"])
if abs(real_kw - (current_kw or 0)) >= 5:
# Új variáns mentése
new_v = AssetCatalog(make=make.upper(), model=truth.get("marketing_name", "Unknown"), power_kw=real_kw)
db.add(new_v)
await db.flush()
await db.execute(update(Asset).where(Asset.id == asset_id).values(catalog_id=new_v.id, is_verified=True))
else:
await db.execute(update(Asset).where(Asset.id == asset_id).values(is_verified=True))
await db.commit()
logger.info(f"✅ Audit sikeres: {vin}")
except Exception as e:
logger.error(f"🚨 Auditor hiba: {e}")
async def run(self):
logger.info("🛡️ Auditor v1.3.0 ONLINE")
while True:
try:
async with SessionLocal() as db:
stmt = select(Asset.id).where(and_(Asset.is_verified == False, Asset.vin.isnot(None))).limit(1)
aid = (await db.execute(stmt)).scalar_one_or_none()
if aid: await self.audit_asset(aid)
else: await asyncio.sleep(60)
except: await asyncio.sleep(30)
if __name__ == "__main__":
asyncio.run(VINAuditor().run())