Files
service-finder/backend/app/workers/vehicle/vehicle_data_loader.py
2026-03-13 10:22:41 +00:00

121 lines
5.3 KiB
Python

import asyncio
import httpx
import json
import logging
from sqlalchemy import text
from app.database import AsyncSessionLocal
# MB 2.0 Naplózási beállítások
logger = logging.getLogger("Data-Loader-Master")
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
class VehicleDataLoader:
# Ellenőrzött, működő források
SOURCES = {
"car-data-kohut": "https://raw.githubusercontent.com/DanielKohut/car-data/master/car_data.json",
"car-list-matth": "https://raw.githubusercontent.com/matthlavacka/car-list/master/car-list.json"
}
@staticmethod
def normalize_name(name: str) -> str:
""" Alapvető szövegtisztítás: nagybetű, szóközmentesítés. """
if not name: return ""
n = str(name).upper().strip()
# Gyakori szinonimák egységesítése
synonyms = {"VW": "VOLKSWAGEN", "MERCEDES": "MERCEDES-BENZ", "ALFA": "ALFA ROMEO"}
return synonyms.get(n, n)
async def fetch_json(self, url: str):
""" Biztonságos letöltés időtúllépés kezeléssel. """
async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
try:
resp = await client.get(url)
if resp.status_code == 200:
return resp.json()
logger.error(f"❌ Letöltési hiba ({resp.status_code}): {url}")
except Exception as e:
logger.error(f"🚨 Kommunikációs hiba: {url} -> {e}")
return None
def map_source_data(self, source_name, raw_data):
"""
Mapping Layer: Átfordítja a különböző források JSON szerkezetét
a mi egységes vehicle.reference_lookup sémánkra.
"""
unified_entries = []
try:
if source_name == "car-data-kohut":
# Szerkezet: list[{"brand": "...", "models": ["...", ...]}]
for brand_item in raw_data:
make = self.normalize_name(brand_item.get("brand"))
for model in brand_item.get("models", []):
unified_entries.append({
"make": make, "model": self.normalize_name(model),
"year": None, "specs": json.dumps({"source": "kohut"}),
"source": source_name, "source_id": None
})
elif source_name == "car-list-matth":
# Szerkezet: list[{"brand": "...", "models": ["...", ...]}]
for brand_item in raw_data:
make = self.normalize_name(brand_item.get("brand"))
for model in brand_item.get("models", []):
unified_entries.append({
"make": make, "model": self.normalize_name(model),
"year": None, "specs": json.dumps({"source": "matthlavacka"}),
"source": source_name, "source_id": None
})
elif source_name == "os-vehicle-db":
# Szerkezet: list[{"make": "...", "model": "...", "year": ...}]
for item in raw_data:
unified_entries.append({
"make": self.normalize_name(item.get("make")),
"model": self.normalize_name(item.get("model")),
"year": item.get("year"),
"specs": json.dumps(item),
"source": source_name,
"source_id": str(item.get("id", ""))
})
except Exception as e:
logger.error(f"⚠️ Mapping hiba a(z) {source_name} feldolgozásakor: {e}")
return unified_entries
async def save_to_db(self, entries):
""" Batch Upsert folyamat az adatbázisba. """
if not entries: return
async with AsyncSessionLocal() as db:
stmt = text("""
INSERT INTO vehicle.reference_lookup (make, model, year, specs, source, source_id)
VALUES (:make, :model, :year, :specs, :source, :source_id)
ON CONFLICT ON CONSTRAINT _ref_lookup_uc
DO UPDATE SET specs = EXCLUDED.specs, updated_at = NOW()
""")
try:
# 1000-es csomagokban küldjük be
for i in range(0, len(entries), 1000):
batch = entries[i:i+1000]
for row in batch:
await db.execute(stmt, row)
await db.commit()
logger.info(f"💾 Mentve: {min(i + 1000, len(entries))} / {len(entries)}")
except Exception as e:
await db.rollback()
logger.error(f"🚨 Adatbázis hiba: {e}")
async def run_sync(self):
logger.info("🚀 Data Loader (Fast Lane Support) elindul...")
for name, url in self.SOURCES.items():
raw_json = await self.fetch_json(url)
if raw_json:
logger.info(f"🧹 {name} feldolgozása...")
unified = self.map_source_data(name, raw_json)
await self.save_to_db(unified)
logger.info("🏁 Minden forrás szinkronizálva.")
if __name__ == "__main__":
asyncio.run(VehicleDataLoader().run_sync())