Cleanup: MB 2.0 Gap Analysis előtti állapot (adatok kizárva)

This commit is contained in:
2026-02-23 09:44:02 +01:00
parent 5757754aae
commit 893f39fa15
74 changed files with 34239 additions and 2834 deletions

View File

@@ -1,208 +1,136 @@
import asyncio
import httpx
import logging
import json
import os
import datetime
from sqlalchemy import text
import sys
from sqlalchemy import text, select
from app.db.session import SessionLocal
from app.models.asset import AssetCatalog
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Robot-v1.0.13-Global-Hunter")
# Logolás beállítása
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
)
logger = logging.getLogger("Hunter-v2.4-Paginator")
class CatalogMaster:
"""
Master Hunter Robot v1.0.13 - Global Hunter Edition
- Holland (RDW), Brit (DVLA) és Amerikai (NHTSA) adatbázis integráció.
- Ratio-Filter: Kiszűri a 0.19-es kW/kg arányszámokat.
- Multi-field Power Discovery: Minden lehetséges mezőből kinyeri a kW-ot.
- Dinamikus évjárat kezelés a duplikációk ellen.
"""
# API Végpontok
class CatalogHunter:
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
US_NHTSA = "https://vpic.nhtsa.dot.gov/api/vehicles/DecodeVinValuesBatch/"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
UK_API_KEY = os.getenv("UK_DVLA_API_KEY")
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
CATEGORY_MAP = {
"Personenauto": "car",
"Motorfiets": "motorcycle",
"Bedrijfsauto": "truck",
"Vrachtwagen": "truck",
"Opleggertrekker": "truck",
"Bus": "bus",
"Aanhangwagen": "trailer",
"Oplegger": "trailer",
"Landbouw- of bosbouwtrekker": "agricultural",
"camper": "camper"
}
@classmethod
def clean_kw(cls, val):
"""Speciális kW tisztító: ignorálja az 1.0 alatti arányszámokat."""
async def get_total_count(cls, client, make_name):
"""Lekéri, összesen hány rekord létezik az adott márkához."""
query_filter = f"upper(merk) like '%{make_name.upper()}%'"
params = {
"$where": query_filter,
"$select": "count(*)"
}
try:
if val is None: return None
f_val = float(str(val).replace(',', '.'))
if 0 < f_val < 1.0: return None # Ez csak arányszám (kW/kg)
v = int(f_val)
return v if v > 0 else None
except (ValueError, TypeError):
return None
@classmethod
def clean_int(cls, val):
"""Általános egész szám tisztító."""
try:
if val is None: return None
return int(float(str(val).replace(',', '.')))
except (ValueError, TypeError):
return None
@classmethod
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
"""Univerzális API hívó sebességkorlátozással."""
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
try:
await asyncio.sleep(1.2) # Biztonsági késleltetés
if method == "POST":
resp = await client.post(url, json=json_data, timeout=30)
else:
resp = await client.get(url, params=params, timeout=30)
return resp.json() if resp.status_code in [200, 201] else []
except Exception as e:
logger.error(f"❌ API Hiba ({url}): {e}")
return []
@classmethod
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
"""Nemzetközi dúsítás: Holland -> Brit -> Amerikai sorrendben."""
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
# 1. HOLLAND (RDW) DÚSÍTÁS
fuel_data = await cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if fuel_data:
f0 = fuel_data[0]
if not res["kw"]:
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
res["euro"] = f0.get("uitlaatemissieniveau")
# 2. BRIT (DVLA) ELLENŐRZÉS (Ha van UK kulcs és még hiányzik adat)
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST", json_data={"registrationNumber": plate}, headers=cls.HEADERS_UK)
if uk_data:
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity")) # Brit adatok finomítása
res["euro"] = res["euro"] or uk_data.get("euroStatus")
# 3. AMERIKAI (NHTSA) KUTATÁS (Ha van alvázszám)
if vin and len(vin) == 17:
us_data = await cls.fetch_api(cls.US_NHTSA, params={"format": "json", "data": vin})
if us_data and "Results" in us_data:
# Az amerikai adatbázisból kinyerjük a lóerőt (HP), ha a kW még mindig nincs meg
hp = us_data["Results"][0].get("EngineHP")
if hp and not res["kw"]:
res["kw"] = int(float(hp) * 0.7457) # HP -> kW konverzió
# RDW Extra adatok (Tengely, Karosszéria)
axle = await cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if axle: res["axles"] = cls.clean_int(axle[0].get("aantal_assen"))
body = await cls.fetch_api(cls.RDW_BODY, {"kenteken": plate}, headers=cls.HEADERS_RDW)
if body: res["body"] = body[0].get("carrosserie_omschrijving", "Standard")
return res
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
if resp.status_code == 200:
data = resp.json()
return int(data[0]['count'])
return 0
except Exception as e:
logger.error(f"⚠️ Nem sikerült a számlálás: {e}")
return 0
@classmethod
async def process_make(cls, db, task_id, make_name):
logger.info(f"🚀 >>> {make_name} GlobalHunter v1.0.13 INDUL...")
offset, limit, total_saved = 0, 1000, 0
unique_variants = {}
while True:
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
main_data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
if not main_data: break
for item in main_data:
plate = item.get("kenteken")
if not plate: continue
model = str(item.get("handelsbenaming", "Unknown")).upper()
ccm = cls.clean_int(item.get("cilinderinhoud"))
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
kw_candidate = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
raw_date = item.get("datum_eerste_toelating")
prod_year = int(str(raw_date)[:4]) if raw_date else 2024
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
if "kampeerwagen" in str(item.get("inrichting", "")).lower(): v_class = "camper"
# Variáns kulcs: Modell + CCM + Súly + kW + Év = Egyedi technikai ujjlenyomat
variant_key = f"{model}-{ccm}-{weight}-{v_class}-{kw_candidate}-{prod_year}"
if variant_key not in unique_variants:
unique_variants[variant_key] = {
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
"plate": plate, "main_kw": kw_candidate, "prod_year": prod_year,
"vin": item.get("vin") # Ha az RDW-ben benne van a VIN
}
if len(main_data) < limit or offset > 90000: break
offset += limit
logger.info(f"📊 {len(unique_variants)} egyedi variáns kutatása indul...")
for key, v in unique_variants.items():
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
try:
db_item = AssetCatalog(
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
year_from=v["prod_year"], euro_class=deep["euro"],
factory_data={
"source": "GlobalHunter-v1.0.13",
"sample_plate": v["plate"],
"enriched_at": str(datetime.datetime.now())
}
)
db.add(db_item)
await db.commit()
total_saved += 1
if total_saved % 50 == 0: logger.info(f"{total_saved} variáns elmentve.")
except Exception:
await db.rollback()
continue
clean_make = make_name.strip().upper()
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} rekord rögzítve.")
async with httpx.AsyncClient(timeout=60) as client:
# 1. LÉPÉS: Megszámoljuk az összes rekordot
total_available = await cls.get_total_count(client, clean_make)
logger.info(f"🚀 >>> {clean_make} feltérképezése: {total_available} variáns található az RDW-ben.")
if total_available == 0:
logger.warning(f"⚠️ {clean_make} márkához nem érkezett adat az API-tól.")
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
return
# 2. LÉPÉS: Lapozás (Pagination)
limit = 1000
offset = 0
total_added = 0
while offset < total_available:
logger.info(f"📑 Lapozás: {clean_make} | {offset} -> {offset + limit} (Összesen: {total_available})")
query_filter = f"upper(merk) like '%{clean_make}%'"
params = {
"$where": query_filter,
"$limit": limit,
"$offset": offset,
"$order": ":id" # Socrata stabil lapozáshoz javasolt
}
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
if resp.status_code != 200:
logger.error(f"❌ Hiba a lapozásnál ({offset}): {resp.status_code}")
break
batch = resp.json()
if not batch: break
# Feldolgozás
for item in batch:
res_make = str(item.get("merk", clean_make)).upper()
model = str(item.get("handelsbenaming", "Unknown")).upper()
ccm = int(float(item.get("cilinderinhoud") or 0))
kw = int(float(item.get("netto_maximum_vermogen") or 0))
# Deduplikáció check
stmt = select(VehicleModelDefinition.id).where(
VehicleModelDefinition.make == res_make,
VehicleModelDefinition.marketing_name == model,
VehicleModelDefinition.engine_capacity == ccm,
VehicleModelDefinition.power_kw == kw
).limit(1)
exists = (await db.execute(stmt)).scalar_one_or_none()
if not exists:
db.add(VehicleModelDefinition(
make=res_make,
technical_code=item.get("kenteken"),
marketing_name=model,
engine_capacity=ccm,
power_kw=kw if kw > 0 else None,
status="unverified",
source="HUNTER-v2.4-PAGINATED"
))
total_added += 1
await db.commit() # Lapvégi mentés
offset += limit
# 3. LÉPÉS: Befejezés
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"{clean_make} KÉSZ. {total_available} rekord átnézve, {total_added} új variáns stagingbe mentve.")
@classmethod
async def run(cls):
logger.info("🤖 Robot 1.0.13 (Global Hunter) ONLINE")
logger.info("🤖 Robot 1 (Hunter) ONLINE - Paginator v2.4")
while True:
async with SessionLocal() as db:
res = await db.execute(text("SELECT id, make FROM data.catalog_discovery WHERE status = 'pending' LIMIT 1"))
query = text("""
SELECT id, make FROM data.catalog_discovery
WHERE status = 'pending'
ORDER BY
CASE WHEN make IN ('SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL') THEN 1 ELSE 2 END,
id ASC
LIMIT 1 FOR UPDATE SKIP LOCKED
""")
res = await db.execute(query)
task = res.fetchone()
if task:
await cls.process_make(db, task[0], task[1])
else:
logger.info("😴 Várólista üres. Alvás 60 mp...")
await asyncio.sleep(60)
await asyncio.sleep(1)
await asyncio.sleep(20)
if __name__ == "__main__":
asyncio.run(CatalogMaster.run())
asyncio.run(CatalogHunter.run())