121 lines
5.3 KiB
Python
121 lines
5.3 KiB
Python
import asyncio
|
|
import httpx
|
|
import json
|
|
import logging
|
|
from sqlalchemy import text
|
|
from app.database import AsyncSessionLocal
|
|
|
|
# MB 2.0 Naplózási beállítások
|
|
logger = logging.getLogger("Data-Loader-Master")
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
|
|
|
class VehicleDataLoader:
|
|
# Ellenőrzött, működő források
|
|
SOURCES = {
|
|
"car-data-kohut": "https://raw.githubusercontent.com/DanielKohut/car-data/master/car_data.json",
|
|
"car-list-matth": "https://raw.githubusercontent.com/matthlavacka/car-list/master/car-list.json"
|
|
}
|
|
|
|
@staticmethod
|
|
def normalize_name(name: str) -> str:
|
|
""" Alapvető szövegtisztítás: nagybetű, szóközmentesítés. """
|
|
if not name: return ""
|
|
n = str(name).upper().strip()
|
|
# Gyakori szinonimák egységesítése
|
|
synonyms = {"VW": "VOLKSWAGEN", "MERCEDES": "MERCEDES-BENZ", "ALFA": "ALFA ROMEO"}
|
|
return synonyms.get(n, n)
|
|
|
|
async def fetch_json(self, url: str):
|
|
""" Biztonságos letöltés időtúllépés kezeléssel. """
|
|
async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
|
|
try:
|
|
resp = await client.get(url)
|
|
if resp.status_code == 200:
|
|
return resp.json()
|
|
logger.error(f"❌ Letöltési hiba ({resp.status_code}): {url}")
|
|
except Exception as e:
|
|
logger.error(f"🚨 Kommunikációs hiba: {url} -> {e}")
|
|
return None
|
|
|
|
def map_source_data(self, source_name, raw_data):
|
|
"""
|
|
Mapping Layer: Átfordítja a különböző források JSON szerkezetét
|
|
a mi egységes vehicle.reference_lookup sémánkra.
|
|
"""
|
|
unified_entries = []
|
|
|
|
try:
|
|
if source_name == "car-data-kohut":
|
|
# Szerkezet: list[{"brand": "...", "models": ["...", ...]}]
|
|
for brand_item in raw_data:
|
|
make = self.normalize_name(brand_item.get("brand"))
|
|
for model in brand_item.get("models", []):
|
|
unified_entries.append({
|
|
"make": make, "model": self.normalize_name(model),
|
|
"year": None, "specs": json.dumps({"source": "kohut"}),
|
|
"source": source_name, "source_id": None
|
|
})
|
|
|
|
elif source_name == "car-list-matth":
|
|
# Szerkezet: list[{"brand": "...", "models": ["...", ...]}]
|
|
for brand_item in raw_data:
|
|
make = self.normalize_name(brand_item.get("brand"))
|
|
for model in brand_item.get("models", []):
|
|
unified_entries.append({
|
|
"make": make, "model": self.normalize_name(model),
|
|
"year": None, "specs": json.dumps({"source": "matthlavacka"}),
|
|
"source": source_name, "source_id": None
|
|
})
|
|
|
|
elif source_name == "os-vehicle-db":
|
|
# Szerkezet: list[{"make": "...", "model": "...", "year": ...}]
|
|
for item in raw_data:
|
|
unified_entries.append({
|
|
"make": self.normalize_name(item.get("make")),
|
|
"model": self.normalize_name(item.get("model")),
|
|
"year": item.get("year"),
|
|
"specs": json.dumps(item),
|
|
"source": source_name,
|
|
"source_id": str(item.get("id", ""))
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"⚠️ Mapping hiba a(z) {source_name} feldolgozásakor: {e}")
|
|
|
|
return unified_entries
|
|
|
|
async def save_to_db(self, entries):
|
|
""" Batch Upsert folyamat az adatbázisba. """
|
|
if not entries: return
|
|
|
|
async with AsyncSessionLocal() as db:
|
|
stmt = text("""
|
|
INSERT INTO vehicle.reference_lookup (make, model, year, specs, source, source_id)
|
|
VALUES (:make, :model, :year, :specs, :source, :source_id)
|
|
ON CONFLICT ON CONSTRAINT _ref_lookup_uc
|
|
DO UPDATE SET specs = EXCLUDED.specs, updated_at = NOW()
|
|
""")
|
|
|
|
try:
|
|
# 1000-es csomagokban küldjük be
|
|
for i in range(0, len(entries), 1000):
|
|
batch = entries[i:i+1000]
|
|
for row in batch:
|
|
await db.execute(stmt, row)
|
|
await db.commit()
|
|
logger.info(f"💾 Mentve: {min(i + 1000, len(entries))} / {len(entries)}")
|
|
except Exception as e:
|
|
await db.rollback()
|
|
logger.error(f"🚨 Adatbázis hiba: {e}")
|
|
|
|
async def run_sync(self):
|
|
logger.info("🚀 Data Loader (Fast Lane Support) elindul...")
|
|
for name, url in self.SOURCES.items():
|
|
raw_json = await self.fetch_json(url)
|
|
if raw_json:
|
|
logger.info(f"🧹 {name} feldolgozása...")
|
|
unified = self.map_source_data(name, raw_json)
|
|
await self.save_to_db(unified)
|
|
logger.info("🏁 Minden forrás szinkronizálva.")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(VehicleDataLoader().run_sync()) |