STABLE: Final schema sync, optimized gitignore
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -1,85 +1,70 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/alchemist_v2_2.py
|
||||
import asyncio
|
||||
import logging
|
||||
from sqlalchemy import select, update, func, and_, case # JAVÍTVA: and_ és case importálva
|
||||
from app.db.session import SessionLocal
|
||||
from sqlalchemy import select, update, func, and_, case
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
from app.services.ai_service import AIService
|
||||
|
||||
# Logolás finomhangolása
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Robot-Alchemist-v2.2")
|
||||
|
||||
class AlchemistBot:
|
||||
def __init__(self):
|
||||
self.batch_size = 5 # GPU VRAM kímélése (Ollama párhuzamosítás mellett)
|
||||
self.delay_between_records = 12 # Quadro P4000 hűtési idő/késleltetés
|
||||
self.batch_size = 5
|
||||
self.delay_between_records = 12 # P4000 hűtési ciklus
|
||||
|
||||
async def synthesize_vehicle(self, vehicle_id: int):
|
||||
"""AI dúsítás végrehajtása a begyűjtött kontextusból."""
|
||||
async with SessionLocal() as db:
|
||||
""" AI dúsítás végrehajtása az MDM logikája szerint. """
|
||||
async with AsyncSessionLocal() as db:
|
||||
res = await db.execute(select(VehicleModelDefinition).where(VehicleModelDefinition.id == vehicle_id))
|
||||
v = res.scalar_one_or_none()
|
||||
|
||||
if not v or not v.raw_search_context:
|
||||
logger.warning(f"⚠️ Nincs kontextus az ID:{vehicle_id} rekordhoz, átugrás.")
|
||||
logger.warning(f"⚠️ Nincs feldolgozható kontextus ID:{vehicle_id}")
|
||||
return
|
||||
|
||||
make, model = v.make, v.marketing_name
|
||||
logger.info(f"🧪 Arany dúsítás indul (AI Synthesis): {make} {model}")
|
||||
logger.info(f"🧪 Alkimista munka indul: {make} {model}")
|
||||
|
||||
# Státusz zárolása a feldolgozás idejére
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(status='ai_synthesis_in_progress')
|
||||
)
|
||||
# Munkaterület lefoglalása
|
||||
v.status = 'ai_synthesis_in_progress'
|
||||
await db.commit()
|
||||
|
||||
# AI hívás: Gold-Data kinyerése a "szemetesládából"
|
||||
# AI hívás (Kívül a DB tranzakción a timeout elkerülésére)
|
||||
gold_data = await AIService.get_gold_data_from_research(make, model, v.raw_search_context)
|
||||
|
||||
async with SessionLocal() as db:
|
||||
async with AsyncSessionLocal() as db:
|
||||
if gold_data:
|
||||
# Értékek kinyerése és normalizálása
|
||||
ccm = gold_data.get("ccm")
|
||||
kw = gold_data.get("kw")
|
||||
m_name = gold_data.get("marketing_name", model)[:50]
|
||||
t_code = gold_data.get("technical_code")
|
||||
|
||||
# MDM Arany adatok rögzítése
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
marketing_name=m_name,
|
||||
technical_code=t_code or v.technical_code,
|
||||
engine_capacity=ccm,
|
||||
power_kw=kw,
|
||||
features_json=gold_data, # A teljes technikai JSON (olaj, gumi, stb.)
|
||||
marketing_name=gold_data.get("marketing_name", model)[:50],
|
||||
technical_code=gold_data.get("technical_code") or v.technical_code,
|
||||
engine_capacity=gold_data.get("ccm"),
|
||||
power_kw=gold_data.get("kw"),
|
||||
specifications=gold_data, # Teljes specifikáció JSONB
|
||||
status='gold_enriched',
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
logger.info(f"✨ GOLD ENRICHED: {make} {m_name} ({ccm} ccm, {kw} kW)")
|
||||
logger.info(f"✨ GOLD DATA GENERÁLVA: {make} {model}")
|
||||
else:
|
||||
# Hiba esetén visszatesszük a sorba, növelve a kísérletek számát
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
status='awaiting_ai_synthesis',
|
||||
attempts=v.attempts + 1,
|
||||
last_error="AI extraction failed or returned empty"
|
||||
)
|
||||
.values(status='awaiting_ai_synthesis', attempts=v.attempts + 1)
|
||||
)
|
||||
logger.warning(f"⚠️ Sikertelen dúsítás: {make} {model}")
|
||||
logger.warning(f"⚠️ AI hiba, visszatéve a sorba: {make} {model}")
|
||||
|
||||
await db.commit()
|
||||
|
||||
async def run(self):
|
||||
logger.info("🚀 Robot 2.2 (Alchemist) ONLINE - Prioritásos feldolgozás")
|
||||
logger.info("🚀 Robot 2.2 (Alchemist) ONLINE")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
# --- PRIORITÁSI LOGIKA (Megegyezik a Researcher botéval) ---
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Prioritás: Autók (Suzuki, Toyota...) -> Többi autó -> Motorok -> Egyéb
|
||||
priorities = case(
|
||||
(and_(VehicleModelDefinition.vehicle_type == 'car',
|
||||
VehicleModelDefinition.make.in_(['SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL'])), 1),
|
||||
@@ -89,7 +74,6 @@ class AlchemistBot:
|
||||
else_=4
|
||||
)
|
||||
|
||||
# Lekérdezés prioritás szerint, majd a legrégebben frissített rekordok szerint
|
||||
stmt = select(VehicleModelDefinition.id).where(
|
||||
VehicleModelDefinition.status == 'awaiting_ai_synthesis'
|
||||
).order_by(priorities, VehicleModelDefinition.updated_at.asc()).limit(self.batch_size)
|
||||
@@ -98,13 +82,11 @@ class AlchemistBot:
|
||||
ids = [r[0] for r in res.fetchall()]
|
||||
|
||||
if not ids:
|
||||
# Ha üres a tartály, pihenünk és várunk a porszívóra
|
||||
await asyncio.sleep(20)
|
||||
continue
|
||||
|
||||
for vid in ids:
|
||||
await self.synthesize_vehicle(vid)
|
||||
# Quadro P4000 hűtés és Ollama API tehermentesítés
|
||||
await asyncio.sleep(self.delay_between_records)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.db.session import SessionLocal
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Smart-Seeder-v1.0.2")
|
||||
|
||||
async def seed_with_priority():
|
||||
# RDW lekérdezés: Márka, Fő kategória és darabszám
|
||||
# Olyan márkákat keresünk, amikből legalább 10 db van
|
||||
URL = "https://opendata.rdw.nl/resource/m9d7-ebf2.json?$select=merk,voertuigsoort,count(*)%20as%20total&$group=merk,voertuigsoort&$having=total%20>=%2010"
|
||||
|
||||
logger.info("📥 Adatok lekérése az RDW-től prioritásos besoroláshoz...")
|
||||
|
||||
async with httpx.AsyncClient(timeout=120) as client:
|
||||
try:
|
||||
resp = await client.get(URL)
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"❌ API hiba: {resp.status_code}")
|
||||
return
|
||||
|
||||
raw_data = resp.json()
|
||||
async with SessionLocal() as db:
|
||||
for entry in raw_data:
|
||||
make = entry.get("merk", "").upper()
|
||||
v_kind = entry.get("voertuigsoort", "")
|
||||
|
||||
# --- PRIORITÁS LOGIKA ---
|
||||
# 1. Személyautó (Personenauto) -> 'pending' (Azonnali feldolgozás)
|
||||
# 2. Motor (Motorfiets) -> 'queued_motor'
|
||||
# 3. Minden más -> 'queued_heavy'
|
||||
|
||||
status = 'queued_heavy'
|
||||
if "Personenauto" in v_kind:
|
||||
status = 'pending'
|
||||
elif "Motorfiets" in v_kind:
|
||||
status = 'queued_motor'
|
||||
|
||||
query = text("""
|
||||
INSERT INTO data.catalog_discovery (make, model, vehicle_class, source, status)
|
||||
VALUES (:make, 'ALL_VARIANTS', :v_class, 'smart_seeder_v2_1', :status)
|
||||
ON CONFLICT (make, model, vehicle_class) DO UPDATE
|
||||
SET status = EXCLUDED.status WHERE data.catalog_discovery.status = 'pending';
|
||||
""")
|
||||
|
||||
await db.execute(query, {
|
||||
"make": make,
|
||||
"v_class": v_kind,
|
||||
"status": status
|
||||
})
|
||||
|
||||
await db.commit()
|
||||
logger.info("✅ A Discovery lista feltöltve és prioritizálva (Autók az élen)!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Hiba: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(seed_with_priority())
|
||||
@@ -1,35 +0,0 @@
|
||||
# app/workers/catalog_filler.py
|
||||
import asyncio
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.asset import AssetCatalog
|
||||
from sqlalchemy import select
|
||||
|
||||
class CatalogFiller:
|
||||
@staticmethod
|
||||
async def seed_initial_data():
|
||||
"""Alapértelmezett márkák és típusok feltöltése (Példa)."""
|
||||
initial_data = [
|
||||
{"make": "Audi", "model": "A4", "generation": "B8 (2008-2015)", "engine_variant": "2.0 TDI (150 LE)", "fuel_type": "Diesel"},
|
||||
{"make": "BMW", "model": "3 Series", "generation": "F30 (2012-2019)", "engine_variant": "320d (190 LE)", "fuel_type": "Diesel"},
|
||||
{"make": "Volkswagen", "model": "Passat", "generation": "B8 (2014-)", "engine_variant": "2.0 TDI (150 LE)", "fuel_type": "Diesel"}
|
||||
]
|
||||
|
||||
async with SessionLocal() as db:
|
||||
for item in initial_data:
|
||||
# Ellenőrizzük, létezik-e már
|
||||
stmt = select(AssetCatalog).where(
|
||||
AssetCatalog.make == item["make"],
|
||||
AssetCatalog.model == item["model"],
|
||||
AssetCatalog.engine_variant == item["engine_variant"]
|
||||
)
|
||||
exists = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if not exists:
|
||||
db.add(AssetCatalog(**item))
|
||||
|
||||
await db.commit()
|
||||
print("Catalog seeding complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogFiller.seed_initial_data())
|
||||
@@ -1,136 +1,182 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/catalog_robot.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
from sqlalchemy import text, select
|
||||
from app.db.session import SessionLocal
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
# Logolás beállítása
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
||||
)
|
||||
logger = logging.getLogger("Hunter-v2.4-Paginator")
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
||||
logger = logging.getLogger("Robot-v1.1.0-Precision")
|
||||
|
||||
class CatalogHunter:
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
"""
|
||||
v1.1.0 Precision-Hunter (Multi-Source Edition)
|
||||
- Integrált Motorkód (Engine Code) vadászat a jh96-v4pq táblából.
|
||||
- Teljesítmény (kW) és Euro besorolás a 8ys7-d773 táblából.
|
||||
- Alapadatok (CCM, Cyl) a m9d7-ebf2 főtáblából.
|
||||
"""
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json" # Főtábla
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json" # Üzemanyag/kW
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json" # Motorkód tábla
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
async def get_total_count(cls, client, make_name):
|
||||
"""Lekéri, összesen hány rekord létezik az adott márkához."""
|
||||
query_filter = f"upper(merk) like '%{make_name.upper()}%'"
|
||||
params = {
|
||||
"$where": query_filter,
|
||||
"$select": "count(*)"
|
||||
}
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
if not text_val: return ""
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try:
|
||||
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
return int(data[0]['count'])
|
||||
if value is None or str(value).strip() == "": return 0
|
||||
return int(float(value))
|
||||
except (ValueError, TypeError):
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
async def fetch_extra_tech(cls, client, plate):
|
||||
"""
|
||||
Összetett adatgyűjtés: Motorkód + Teljesítmény + Euro besorolás.
|
||||
Két külön API hívást indít párhuzamosan a rendszámhoz.
|
||||
"""
|
||||
params = {"kenteken": plate}
|
||||
results = {"power_kw": 0, "euro_klasse": None, "fuel_desc": "Unknown", "engine_code": None}
|
||||
|
||||
try:
|
||||
# 1. Lekérdezés: Üzemanyag és Teljesítmény (kW)
|
||||
# 2. Lekérdezés: Motorkód
|
||||
resp_fuel, resp_eng = await asyncio.gather(
|
||||
client.get(cls.RDW_FUEL, params=params, headers=cls.HEADERS),
|
||||
client.get(cls.RDW_ENGINE, params=params, headers=cls.HEADERS)
|
||||
)
|
||||
|
||||
# Üzemanyag adatok feldolgozása
|
||||
if resp_fuel.status_code == 200:
|
||||
fuel_rows = resp_fuel.json()
|
||||
max_p = 0
|
||||
f_types = []
|
||||
for row in fuel_rows:
|
||||
p = max(cls.parse_int(row.get("netto_maximum_vermogen")),
|
||||
cls.parse_int(row.get("nominaal_continu_maximum_vermogen")))
|
||||
if p > max_p: max_p = p
|
||||
f = row.get("brandstof_omschrijving")
|
||||
if f and f not in f_types: f_types.append(f)
|
||||
if not results["euro_klasse"]:
|
||||
results["euro_klasse"] = row.get("uitlaatemissieniveau") or row.get("euro_klasse")
|
||||
|
||||
results["power_kw"] = max_p
|
||||
results["fuel_desc"] = ", ".join(f_types) if f_types else "Unknown"
|
||||
|
||||
# Motorkód feldolgozása
|
||||
if resp_eng.status_code == 200:
|
||||
eng_rows = resp_eng.json()
|
||||
if eng_rows:
|
||||
# Az első érvényes motorkódot vesszük ki
|
||||
results["engine_code"] = eng_rows[0].get("motorcode")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"⚠️ Nem sikerült a számlálás: {e}")
|
||||
return 0
|
||||
logger.error(f"❌ RDW-Extra hiba ({plate}): {e}")
|
||||
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
async def process_make(cls, db, task_id, make_name):
|
||||
clean_make = make_name.strip().upper()
|
||||
logger.info(f"🎯 PRECÍZIÓS KUTATÁS INDUL: {clean_make}")
|
||||
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
# 1. LÉPÉS: Megszámoljuk az összes rekordot
|
||||
total_available = await cls.get_total_count(client, clean_make)
|
||||
logger.info(f"🚀 >>> {clean_make} feltérképezése: {total_available} variáns található az RDW-ben.")
|
||||
|
||||
if total_available == 0:
|
||||
logger.warning(f"⚠️ {clean_make} márkához nem érkezett adat az API-tól.")
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
return
|
||||
current_offset = 0
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
while True:
|
||||
params = {"merk": clean_make, "$limit": cls.BATCH_SIZE, "$offset": current_offset, "$order": "kenteken DESC"}
|
||||
try:
|
||||
r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS)
|
||||
if r.status_code != 200: break
|
||||
batch = r.json()
|
||||
except Exception: break
|
||||
|
||||
if not batch:
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"🏁 {clean_make} TELJESEN KÉSZ.")
|
||||
return
|
||||
|
||||
# 2. LÉPÉS: Lapozás (Pagination)
|
||||
limit = 1000
|
||||
offset = 0
|
||||
total_added = 0
|
||||
|
||||
while offset < total_available:
|
||||
logger.info(f"📑 Lapozás: {clean_make} | {offset} -> {offset + limit} (Összesen: {total_available})")
|
||||
|
||||
query_filter = f"upper(merk) like '%{clean_make}%'"
|
||||
params = {
|
||||
"$where": query_filter,
|
||||
"$limit": limit,
|
||||
"$offset": offset,
|
||||
"$order": ":id" # Socrata stabil lapozáshoz javasolt
|
||||
}
|
||||
|
||||
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"❌ Hiba a lapozásnál ({offset}): {resp.status_code}")
|
||||
break
|
||||
|
||||
batch = resp.json()
|
||||
if not batch: break
|
||||
|
||||
# Feldolgozás
|
||||
for item in batch:
|
||||
res_make = str(item.get("merk", clean_make)).upper()
|
||||
model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
ccm = int(float(item.get("cilinderinhoud") or 0))
|
||||
kw = int(float(item.get("netto_maximum_vermogen") or 0))
|
||||
|
||||
# Deduplikáció check
|
||||
stmt = select(VehicleModelDefinition.id).where(
|
||||
VehicleModelDefinition.make == res_make,
|
||||
VehicleModelDefinition.marketing_name == model,
|
||||
VehicleModelDefinition.engine_capacity == ccm,
|
||||
VehicleModelDefinition.power_kw == kw
|
||||
).limit(1)
|
||||
|
||||
exists = (await db.execute(stmt)).scalar_one_or_none()
|
||||
if not exists:
|
||||
db.add(VehicleModelDefinition(
|
||||
make=res_make,
|
||||
technical_code=item.get("kenteken"),
|
||||
marketing_name=model,
|
||||
engine_capacity=ccm,
|
||||
power_kw=kw if kw > 0 else None,
|
||||
status="unverified",
|
||||
source="HUNTER-v2.4-PAGINATED"
|
||||
))
|
||||
total_added += 1
|
||||
|
||||
await db.commit() # Lapvégi mentés
|
||||
offset += limit
|
||||
async with db.begin_nested():
|
||||
try:
|
||||
plate = item.get("kenteken")
|
||||
if not plate: continue
|
||||
|
||||
# 3. LÉPÉS: Befejezés
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"✅ {clean_make} KÉSZ. {total_available} rekord átnézve, {total_added} új variáns stagingbe mentve.")
|
||||
raw_model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
model_name = raw_model.replace(clean_make, "").strip() or raw_model
|
||||
norm_name = cls.normalize(model_name)
|
||||
|
||||
# Alapadatok a főtáblából
|
||||
ccm = cls.parse_int(item.get("cilinderinhoud"))
|
||||
cyl = cls.parse_int(item.get("aantal_cilinders"))
|
||||
doors = cls.parse_int(item.get("aantal_deuren"))
|
||||
v_class = item.get("voertuigsoort")
|
||||
b_type = item.get("inrichting")
|
||||
v_code = item.get("variant")
|
||||
ver_code = item.get("uitvoering")
|
||||
|
||||
# Évjárat
|
||||
date_str = item.get("datum_eerste_toelating", "0000")
|
||||
year = int(str(date_str)[:4]) if len(str(date_str)) >= 4 else 0
|
||||
|
||||
# Párhuzamos technikai dúsítás (Motorkód + kW + Euro)
|
||||
tech = await cls.fetch_extra_tech(client, plate)
|
||||
|
||||
# Mentés vagy Frissítés
|
||||
stmt = select(VehicleModelDefinition).where(
|
||||
VehicleModelDefinition.make == clean_make,
|
||||
VehicleModelDefinition.normalized_name == norm_name,
|
||||
VehicleModelDefinition.variant_code == v_code,
|
||||
VehicleModelDefinition.version_code == ver_code,
|
||||
VehicleModelDefinition.fuel_type == tech["fuel_desc"]
|
||||
).limit(1)
|
||||
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Frissítés: Ha korábban nem volt meg a motorkód vagy kW, most pótoljuk
|
||||
if tech["engine_code"]: existing.engine_code = tech["engine_code"]
|
||||
if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"]
|
||||
if tech["euro_klasse"]: existing.euro_classification = tech["euro_klasse"]
|
||||
else:
|
||||
db.add(VehicleModelDefinition(
|
||||
make=clean_make, marketing_name=model_name, normalized_name=norm_name,
|
||||
marketing_name_aliases=[raw_model], technical_code=plate,
|
||||
variant_code=v_code, version_code=ver_code, vehicle_class=v_class,
|
||||
body_type=b_type, fuel_type=tech["fuel_desc"], engine_capacity=ccm,
|
||||
engine_code=tech["engine_code"], # ÚJ MEZŐ!
|
||||
power_kw=tech["power_kw"], cylinders=cyl, doors=doors,
|
||||
euro_classification=tech["euro_klasse"],
|
||||
year_from=year if year > 0 else None, year_to=year if year > 0 else None,
|
||||
source="PRECISION-HUNTER-v1.1.0"
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Hiba ({plate}): {e}")
|
||||
|
||||
await db.commit()
|
||||
current_offset += len(batch)
|
||||
logger.info(f"📈 {clean_make}: {current_offset} rendszám feldolgozva (Engine codes + kW OK)")
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot 1 (Hunter) ONLINE - Paginator v2.4")
|
||||
logger.info("🤖 Robot v1.1.0 PRECISION-HUNTER ONLINE")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
query = text("""
|
||||
SELECT id, make FROM data.catalog_discovery
|
||||
WHERE status = 'pending'
|
||||
ORDER BY
|
||||
CASE WHEN make IN ('SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL') THEN 1 ELSE 2 END,
|
||||
id ASC
|
||||
LIMIT 1 FOR UPDATE SKIP LOCKED
|
||||
""")
|
||||
res = await db.execute(query)
|
||||
task = res.fetchone()
|
||||
if task:
|
||||
await cls.process_make(db, task[0], task[1])
|
||||
else:
|
||||
await asyncio.sleep(20)
|
||||
async with AsyncSessionLocal() as db:
|
||||
query = text("SELECT id, make FROM data.catalog_discovery WHERE status IN ('pending', 'processing') ORDER BY priority_score DESC LIMIT 1")
|
||||
task = (await db.execute(query)).fetchone()
|
||||
if task: await cls.process_make(db, task[0], task[1])
|
||||
else: await asyncio.sleep(60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
@@ -1,270 +0,0 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import datetime
|
||||
import sys
|
||||
from sqlalchemy import text
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.asset import AssetCatalog
|
||||
|
||||
# --- KÉNYSZERÍTETT IDŐBÉLYEGES LOGOLÁS ---
|
||||
# Töröljük az esetleges korábbi konfigurációkat, hogy az időbélyeg garantált legyen
|
||||
for handler in logging.root.handlers[:]:
|
||||
logging.root.removeHandler(handler)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
stream=sys.stdout
|
||||
)
|
||||
logger = logging.getLogger("Robot-v1.4.1-Powerhouse")
|
||||
|
||||
class CatalogMaster:
|
||||
"""
|
||||
Master Hunter Robot v1.4.1 - Powerhouse Edition
|
||||
- Párhuzamos Holland (RDW) és Amerikai (NHTSA Batch) Discovery.
|
||||
- Garantált időbélyeges naplózás.
|
||||
- Multi-Worker Safe (FOR UPDATE SKIP LOCKED).
|
||||
- Rate Limit (429) védelem.
|
||||
"""
|
||||
|
||||
# API Végpontok
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
|
||||
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
|
||||
US_BATCH = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json"
|
||||
|
||||
# BRIT API (Token után aktiválható)
|
||||
UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
UK_API_KEY = os.getenv("UK_DVLA_API_KEY")
|
||||
|
||||
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
|
||||
|
||||
CATEGORY_MAP = {
|
||||
"Personenauto": "car",
|
||||
"Motorfiets": "motorcycle",
|
||||
"Bedrijfsauto": "truck",
|
||||
"Vrachtwagen": "truck",
|
||||
"Opleggertrekker": "truck",
|
||||
"Bus": "bus",
|
||||
"Aanhangwagen": "trailer",
|
||||
"Oplegger": "trailer",
|
||||
"Landbouw- of bosbouwtrekker": "agricultural",
|
||||
"camper": "camper"
|
||||
}
|
||||
|
||||
# Szabályozzuk a párhuzamos dúsítást (egyszerre max 5 kérés robotpéldányonként)
|
||||
semaphore = asyncio.Semaphore(5)
|
||||
|
||||
@classmethod
|
||||
def clean_kw(cls, val):
|
||||
try:
|
||||
if val is None: return None
|
||||
f_val = float(str(val).replace(',', '.'))
|
||||
if 0 < f_val < 1.0: return None
|
||||
v = int(f_val)
|
||||
return v if v > 0 else None
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def clean_int(cls, val):
|
||||
try:
|
||||
if val is None: return None
|
||||
return int(float(str(val).replace(',', '.')))
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
|
||||
"""Intelligens API hívó 429-es védelemmel és időzített logolással."""
|
||||
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
|
||||
for attempt in range(3):
|
||||
try:
|
||||
if method == "POST":
|
||||
resp = await client.post(url, json=json_data, timeout=30)
|
||||
else:
|
||||
resp = await client.get(url, params=params, timeout=30)
|
||||
|
||||
if resp.status_code == 429:
|
||||
wait_time = (attempt + 1) * 5
|
||||
logger.warning(f"⚠️ RATE LIMIT! Várakozás {wait_time}mp: {url}")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
|
||||
return resp.json() if resp.status_code in [200, 201] else []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ API Hiba ({url}): {e}")
|
||||
await asyncio.sleep(2)
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
|
||||
"""Mély dúsítás több forrásból párhuzamosan."""
|
||||
async with cls.semaphore:
|
||||
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
|
||||
|
||||
# --- 1. HOLLAND (RDW) DÚSÍTÁS ---
|
||||
fuel_task = cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
||||
axle_task = cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
||||
|
||||
fuel_data, axle_data = await asyncio.gather(fuel_task, axle_task)
|
||||
|
||||
if fuel_data:
|
||||
f0 = fuel_data[0]
|
||||
if not res["kw"]:
|
||||
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
|
||||
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
|
||||
res["euro"] = f0.get("uitlaatemissieniveau")
|
||||
|
||||
if axle_data:
|
||||
res["axles"] = cls.clean_int(axle_data[0].get("aantal_assen"))
|
||||
|
||||
# --- 2. BRIT (DVLA) ELLENŐRZÉS (AKTIVÁLHATÓ KULCCSAL) ---
|
||||
"""
|
||||
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
|
||||
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST",
|
||||
json_data={"registrationNumber": plate},
|
||||
headers=cls.HEADERS_UK)
|
||||
if uk_data and not isinstance(uk_data, list):
|
||||
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity"))
|
||||
res["euro"] = res["euro"] or uk_data.get("euroStatus")
|
||||
"""
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
async def discover_holland(cls, make_name, limit=1000):
|
||||
"""Holland Discovery ág: rendszámok gyűjtése."""
|
||||
offset, variants = 0, {}
|
||||
while True:
|
||||
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
|
||||
data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
|
||||
if not data: break
|
||||
|
||||
for item in data:
|
||||
plate = item.get("kenteken")
|
||||
if not plate: continue
|
||||
model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
ccm = cls.clean_int(item.get("cilinderinhoud"))
|
||||
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
|
||||
kw = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
|
||||
raw_date = item.get("datum_eerste_toelating")
|
||||
year = int(str(raw_date)[:4]) if raw_date else 2024
|
||||
|
||||
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
|
||||
key = f"{model}-{ccm}-{weight}-{v_class}-{kw}-{year}"
|
||||
|
||||
if key not in variants:
|
||||
variants[key] = {
|
||||
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
|
||||
"plate": plate, "main_kw": kw, "prod_year": year, "vin": item.get("vin")
|
||||
}
|
||||
if len(data) < limit: break
|
||||
offset += limit
|
||||
return variants
|
||||
|
||||
@classmethod
|
||||
async def discover_usa_batch(cls, make_name):
|
||||
"""Amerikai NHTSA Batch Discovery: Típusok gyűjtése."""
|
||||
variants = {}
|
||||
years = range(datetime.datetime.now().year - 5, datetime.datetime.now().year + 1)
|
||||
|
||||
async def fetch_year(year):
|
||||
url = cls.US_BATCH.format(make=make_name.upper(), year=year)
|
||||
logger.info(f"🇺🇸 USA Batch Discovery indítása: {make_name} ({year})")
|
||||
data = await cls.fetch_api(url)
|
||||
if data and "Results" in data:
|
||||
for m in data["Results"]:
|
||||
m_name = m.get("Model_Name", "Unknown").upper()
|
||||
key = f"US-{m_name}-{year}"
|
||||
if key not in variants:
|
||||
variants[key] = {
|
||||
"model": m_name, "ccm": None, "weight": None, "v_class": "car",
|
||||
"plate": "US-DISCOVERY", "main_kw": None, "prod_year": year, "vin": None
|
||||
}
|
||||
|
||||
await asyncio.gather(*(fetch_year(y) for y in years))
|
||||
return variants
|
||||
|
||||
@classmethod
|
||||
async def process_make(cls, db, task_id, make_name):
|
||||
logger.info(f"🚀 >>> {make_name} Powerhouse v1.4.1 INDUL...")
|
||||
|
||||
# Párhuzamos Discovery
|
||||
holland_task = cls.discover_holland(make_name)
|
||||
usa_task = cls.discover_usa_batch(make_name)
|
||||
|
||||
holland_variants, usa_variants = await asyncio.gather(holland_task, usa_task)
|
||||
all_variants = {**usa_variants, **holland_variants}
|
||||
|
||||
logger.info(f"📊 Összefésült variánsok száma: {len(all_variants)}")
|
||||
|
||||
async def enrich_and_save(v):
|
||||
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
|
||||
try:
|
||||
db_item = AssetCatalog(
|
||||
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
|
||||
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
|
||||
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
|
||||
year_from=v["prod_year"], euro_class=deep["euro"],
|
||||
factory_data={
|
||||
"source": "Powerhouse-v1.4.1",
|
||||
"discovery_nl": v["plate"] != "US-DISCOVERY",
|
||||
"enriched_at": str(datetime.datetime.now())
|
||||
}
|
||||
)
|
||||
return db_item
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# Párhuzamos dúsítás (Semaphore korláttal)
|
||||
results = await asyncio.gather(*(enrich_and_save(v) for v in all_variants.values()))
|
||||
|
||||
total_saved = 0
|
||||
for item in results:
|
||||
if item:
|
||||
db.add(item)
|
||||
total_saved += 1
|
||||
|
||||
await db.commit()
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} egyedi rekord rögzítve.")
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot 1.4.1 (Powerhouse) ONLINE - Multi-Worker Safe Mode")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
# SKIP LOCKED védelem a párhuzamos futtatáshoz
|
||||
query = text("""
|
||||
SELECT id, make FROM data.catalog_discovery
|
||||
WHERE status = 'pending'
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
""")
|
||||
res = await db.execute(query)
|
||||
task = res.fetchone()
|
||||
|
||||
if task:
|
||||
task_id, make_name = task
|
||||
await db.execute(
|
||||
text("UPDATE data.catalog_discovery SET status = 'running' WHERE id = :id"),
|
||||
{"id": task_id}
|
||||
)
|
||||
await db.commit()
|
||||
await cls.process_make(db, task_id, make_name)
|
||||
else:
|
||||
logger.info("😴 Várólista üres vagy minden feladat foglalt. Alvás 60mp...")
|
||||
await asyncio.sleep(60)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogMaster.run())
|
||||
@@ -1,272 +0,0 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import datetime
|
||||
from sqlalchemy import text
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.asset import AssetCatalog
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Robot-v1.4-Powerhouse")
|
||||
|
||||
class CatalogMaster:
|
||||
"""
|
||||
Master Hunter Robot v1.4 - Powerhouse Edition
|
||||
- Párhuzamos Holland (RDW) és Amerikai (NHTSA Batch) Discovery.
|
||||
- Előkészített, kikommentelt Brit (DVLA) integráció.
|
||||
- Async Semaphore: Párhuzamos technikai dúsítás (egyszerre 10 szálon).
|
||||
- Intelligens összefésülés a globális források között.
|
||||
"""
|
||||
|
||||
# API Végpontok
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
|
||||
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
|
||||
|
||||
# AMERIKAI BATCH API: Egyetlen hívással az összes modell évjárat szerint
|
||||
US_BATCH = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json"
|
||||
|
||||
# BRIT API (Kikapcsolva a tokenig)
|
||||
# UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
UK_API_KEY = os.getenv("UK_DVLA_API_KEY") # Jövőbeli token helye
|
||||
|
||||
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
# HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
|
||||
|
||||
CATEGORY_MAP = {
|
||||
"Personenauto": "car",
|
||||
"Motorfiets": "motorcycle",
|
||||
"Bedrijfsauto": "truck",
|
||||
"Vrachtwagen": "truck",
|
||||
"Opleggertrekker": "truck",
|
||||
"Bus": "bus",
|
||||
"Aanhangwagen": "trailer",
|
||||
"Oplegger": "trailer",
|
||||
"Landbouw- of bosbouwtrekker": "agricultural",
|
||||
"camper": "camper"
|
||||
}
|
||||
|
||||
# Szabályozzuk a párhuzamos dúsítást, hogy ne tiltsanak le (egyszerre max 10 kérés)
|
||||
semaphore = asyncio.Semaphore(5)
|
||||
|
||||
@classmethod
|
||||
def clean_kw(cls, val):
|
||||
try:
|
||||
if val is None: return None
|
||||
f_val = float(str(val).replace(',', '.'))
|
||||
if 0 < f_val < 1.0: return None
|
||||
v = int(f_val)
|
||||
return v if v > 0 else None
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def clean_int(cls, val):
|
||||
try:
|
||||
if val is None: return None
|
||||
return int(float(str(val).replace(',', '.')))
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
|
||||
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
|
||||
for attempt in range(3): # 3-szor próbáljuk újra, ha kell
|
||||
try:
|
||||
if method == "POST":
|
||||
resp = await client.post(url, json=json_data, timeout=30)
|
||||
else:
|
||||
resp = await client.get(url, params=params, timeout=30)
|
||||
|
||||
if resp.status_code == 429: # HOPPÁ, túl gyorsak vagyunk!
|
||||
wait_time = (attempt + 1) * 5 # Egyre többet vár: 5s, 10s...
|
||||
logger.warning(f"⚠️ RDW limit elérve! Pihenő {wait_time} mp...")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
|
||||
return resp.json() if resp.status_code in [200, 201] else []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ API Hiba ({url}): {e}")
|
||||
await asyncio.sleep(2)
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
|
||||
"""Mély dúsítás párhuzamos forrásokból."""
|
||||
async with cls.semaphore:
|
||||
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
|
||||
|
||||
# --- 1. HOLLAND (RDW) DÚSÍTÁS ---
|
||||
fuel_task = cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
||||
axle_task = cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
||||
|
||||
# Holland adatok párhuzamos lekérése
|
||||
fuel_data, axle_data = await asyncio.gather(fuel_task, axle_task)
|
||||
|
||||
if fuel_data:
|
||||
f0 = fuel_data[0]
|
||||
if not res["kw"]:
|
||||
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
|
||||
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
|
||||
res["euro"] = f0.get("uitlaatemissieniveau")
|
||||
|
||||
if axle_data:
|
||||
res["axles"] = cls.clean_int(axle_data[0].get("aantal_assen"))
|
||||
|
||||
# --- 2. BRIT (DVLA) ELLENŐRZÉS (KIKOMMENTELVE A TOKENIG) ---
|
||||
"""
|
||||
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
|
||||
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST",
|
||||
json_data={"registrationNumber": plate},
|
||||
headers=cls.HEADERS_UK)
|
||||
if uk_data and not isinstance(uk_data, list):
|
||||
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity"))
|
||||
res["euro"] = res["euro"] or uk_data.get("euroStatus")
|
||||
"""
|
||||
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
async def discover_holland(cls, make_name, limit=1000):
|
||||
"""Holland Discovery ág."""
|
||||
offset, variants = 0, {}
|
||||
while True:
|
||||
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
|
||||
data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
|
||||
if not data: break
|
||||
|
||||
for item in data:
|
||||
plate = item.get("kenteken")
|
||||
if not plate: continue
|
||||
model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
ccm = cls.clean_int(item.get("cilinderinhoud"))
|
||||
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
|
||||
kw = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
|
||||
raw_date = item.get("datum_eerste_toelating")
|
||||
year = int(str(raw_date)[:4]) if raw_date else 2024
|
||||
|
||||
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
|
||||
key = f"{model}-{ccm}-{weight}-{v_class}-{kw}-{year}"
|
||||
|
||||
if key not in variants:
|
||||
variants[key] = {
|
||||
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
|
||||
"plate": plate, "main_kw": kw, "prod_year": year, "vin": item.get("vin")
|
||||
}
|
||||
if len(data) < limit: break
|
||||
offset += limit
|
||||
return variants
|
||||
|
||||
@classmethod
|
||||
async def discover_usa_batch(cls, make_name):
|
||||
"""Amerikai NHTSA Batch Discovery ág (2020-2025 évjáratokra)."""
|
||||
variants = {}
|
||||
# Az utolsó 5 évjáratot nézzük a legfrissebb modellekért
|
||||
years = range(datetime.datetime.now().year - 5, datetime.datetime.now().year + 1)
|
||||
|
||||
async def fetch_year(year):
|
||||
url = cls.US_BATCH.format(make=make_name.upper(), year=year)
|
||||
data = await cls.fetch_api(url)
|
||||
if data and "Results" in data:
|
||||
for m in data["Results"]:
|
||||
m_name = m.get("Model_Name", "Unknown").upper()
|
||||
# US adatoknál nincs rendszám, de a Robot 2 dúsítani fogja ha kell
|
||||
key = f"US-{m_name}-{year}"
|
||||
variants[key] = {
|
||||
"model": m_name, "ccm": None, "weight": None, "v_class": "car",
|
||||
"plate": "US-DISCOVERY", "main_kw": None, "prod_year": year, "vin": None
|
||||
}
|
||||
|
||||
await asyncio.gather(*(fetch_year(y) for y in years))
|
||||
return variants
|
||||
|
||||
@classmethod
|
||||
async def process_make(cls, db, task_id, make_name):
|
||||
logger.info(f"🚀 >>> {make_name} Powerhouse v1.4 INDUL...")
|
||||
|
||||
# PÁRHUZAMOS DISCOVERY: Holland és USA egyszerre
|
||||
holland_task = cls.discover_holland(make_name)
|
||||
usa_task = cls.discover_usa_batch(make_name)
|
||||
|
||||
holland_variants, usa_variants = await asyncio.gather(holland_task, usa_task)
|
||||
|
||||
# Összefésülés (Holland élvez elsőbbséget a rendszám miatt)
|
||||
all_variants = {**usa_variants, **holland_variants}
|
||||
logger.info(f"📊 Összesen {len(all_variants)} egyedi variáns (NL: {len(holland_variants)}, US: {len(usa_variants)})")
|
||||
|
||||
# PÁRHUZAMOS DÚSÍTÁS
|
||||
async def enrich_and_save(v):
|
||||
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
|
||||
try:
|
||||
db_item = AssetCatalog(
|
||||
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
|
||||
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
|
||||
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
|
||||
year_from=v["prod_year"], euro_class=deep["euro"],
|
||||
factory_data={
|
||||
"source": "Powerhouse-v1.4",
|
||||
"discovery_nl": v["plate"] != "US-DISCOVERY",
|
||||
"enriched_at": str(datetime.datetime.now())
|
||||
}
|
||||
)
|
||||
return db_item
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# Egyszerre indítjuk a dúsításokat (A semaphore korlátozza a szálakat)
|
||||
results = await asyncio.gather(*(enrich_and_save(v) for v in all_variants.values()))
|
||||
|
||||
# Mentés
|
||||
total_saved = 0
|
||||
for item in results:
|
||||
if item:
|
||||
db.add(item)
|
||||
total_saved += 1
|
||||
|
||||
await db.commit()
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} rekord rögzítve.")
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot 1.4 (Powerhouse) ONLINE - Multi-Worker Safe")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
# 1. 'FOR UPDATE SKIP LOCKED' - Megfogjuk a sort és lelakatoljuk,
|
||||
# de a többi robot átugorja, amit mi már fogunk.
|
||||
query = text("""
|
||||
SELECT id, make FROM data.catalog_discovery
|
||||
WHERE status = 'pending'
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
""")
|
||||
|
||||
res = await db.execute(query)
|
||||
task = res.fetchone()
|
||||
|
||||
if task:
|
||||
task_id, make_name = task
|
||||
# 2. Azonnal átállítjuk 'running'-ra a tranzakción belül,
|
||||
# így senki más nem nyúl hozzá.
|
||||
await db.execute(
|
||||
text("UPDATE data.catalog_discovery SET status = 'running' WHERE id = :id"),
|
||||
{"id": task_id}
|
||||
)
|
||||
await db.commit() # Itt véglegesítjük a foglalást
|
||||
|
||||
# 3. Indulhat a tényleges munka
|
||||
await cls.process_make(db, task_id, make_name)
|
||||
else:
|
||||
logger.info("😴 Várólista üres (vagy minden sor foglalt). Alvás 60 mp...")
|
||||
await asyncio.sleep(60)
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogMaster.run())
|
||||
109
backend/app/workers/discovery_engine.py
Normal file
109
backend/app/workers/discovery_engine.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/discovery_engine.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
from sqlalchemy import text, select
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.asset import AssetCatalog
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Discovery-Engine-v2.0")
|
||||
|
||||
class DiscoveryEngine:
|
||||
"""
|
||||
A Robot-ökoszisztéma 'etetője'.
|
||||
Kombinálja a külső API felfedezést és a manuális alapozó adatokat.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
async def seed_manual_bootstrap():
|
||||
"""
|
||||
1. FÁZIS: Manuális alapozás (Bootstrap).
|
||||
Azonnali, biztos pontok a katalógusban a teszteléshez.
|
||||
"""
|
||||
initial_data = [
|
||||
{"make": "AUDI", "model": "A4", "generation": "B8 (2008-2015)", "vehicle_class": "car"},
|
||||
{"make": "BMW", "model": "3 SERIES", "generation": "F30 (2012-2019)", "vehicle_class": "car"},
|
||||
{"make": "VOLKSWAGEN", "model": "PASSAT", "generation": "B8 (2014-)", "vehicle_class": "car"},
|
||||
{"make": "SUZUKI", "model": "VITARA", "generation": "LY (2015-)", "vehicle_class": "car"}
|
||||
]
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
logger.info("🛠️ Manuális bootstrap indul...")
|
||||
for item in initial_data:
|
||||
stmt = select(AssetCatalog).where(
|
||||
AssetCatalog.make == item["make"],
|
||||
AssetCatalog.model == item["model"]
|
||||
)
|
||||
exists = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if not exists:
|
||||
db.add(AssetCatalog(**item))
|
||||
|
||||
await db.commit()
|
||||
logger.info("✅ Manuális bootstrap kész.")
|
||||
|
||||
@staticmethod
|
||||
async def seed_from_rdw():
|
||||
"""
|
||||
2. FÁZIS: Külső prioritásos felfedezés (RDW API).
|
||||
Feltölti a várólistát a Hunter robot számára.
|
||||
"""
|
||||
RDW_URL = (
|
||||
"https://opendata.rdw.nl/resource/m9d7-ebf2.json?"
|
||||
"$select=merk,voertuigsoort,count(*)%20as%20total"
|
||||
"&$group=merk,voertuigsoort"
|
||||
"&$having=total%20>=%2010"
|
||||
)
|
||||
|
||||
logger.info("📥 RDW adatgyűjtés indul a várólistához...")
|
||||
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
try:
|
||||
resp = await client.get(RDW_URL)
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"❌ RDW API hiba: {resp.status_code}")
|
||||
return
|
||||
|
||||
raw_data = resp.json()
|
||||
async with AsyncSessionLocal() as db:
|
||||
for entry in raw_data:
|
||||
make = str(entry.get("merk", "")).upper().strip()
|
||||
v_kind = entry.get("voertuigsoort", "")
|
||||
|
||||
if not make: continue
|
||||
|
||||
# Prioritás és Kategória meghatározása
|
||||
if "Personenauto" in v_kind:
|
||||
status, v_class = 'pending', 'car'
|
||||
elif "Motorfiets" in v_kind:
|
||||
status, v_class = 'queued_motor', 'motorcycle'
|
||||
else:
|
||||
status, v_class = 'queued_heavy', 'truck'
|
||||
|
||||
# UPSERT (Ütközéskezelés)
|
||||
query = text("""
|
||||
INSERT INTO data.catalog_discovery (make, model, vehicle_class, source, status)
|
||||
VALUES (:make, 'ALL_VARIANTS', :v_class, 'discovery_engine_v2', :status)
|
||||
ON CONFLICT (make, model, vehicle_class) DO NOTHING;
|
||||
""")
|
||||
|
||||
await db.execute(query, {"make": make, "v_class": v_class, "status": status})
|
||||
|
||||
await db.commit()
|
||||
logger.info(f"✅ Discovery lista frissítve ({len(raw_data)} márka).")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Hiba az RDW szinkron alatt: {e}")
|
||||
|
||||
@classmethod
|
||||
async def run_full_initialization(cls):
|
||||
""" A teljes rendszerindító folyamat. """
|
||||
logger.info("🚀 Discovery Engine: TELJES INICIALIZÁLÁS")
|
||||
await cls.seed_manual_bootstrap()
|
||||
await cls.seed_from_rdw()
|
||||
logger.info("🏁 Minden alapozó folyamat lefutott.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(DiscoveryEngine.run_full_initialization())
|
||||
@@ -1,3 +0,0 @@
|
||||
nev,cim,telefon,web,tipus
|
||||
Ideál Autó Dunakeszi,"2120 Dunakeszi, Pallag u. 7",+36201234567,http://idealauto.hu,car_repair
|
||||
IMCMotor Szerviz,"2120 Dunakeszi, Kikerics köz 4",+36703972543,https://www.imcmotor.hu,motorcycle_repair
|
||||
|
@@ -1,66 +1,131 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/ocr_robot.py
|
||||
import asyncio
|
||||
import os
|
||||
import logging
|
||||
from PIL import Image
|
||||
from sqlalchemy import select, update
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.document import Document # Feltételezve
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.document import Document
|
||||
from app.models.identity import User
|
||||
from app.services.ai_service import AIService
|
||||
from app.core.config import settings
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
# Logolás beállítása
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Robot-OCR-V3")
|
||||
|
||||
NAS_BASE_PATH = os.getenv("NAS_STORAGE_PATH", "/mnt/nas/user_vault")
|
||||
|
||||
class OCRRobot:
|
||||
"""
|
||||
Robot 3: Dokumentum elemző és adatkinyerő.
|
||||
Kizárólag a Premium és VIP előfizetők dokumentumait dolgozza fel automatikusan.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _sync_resize_and_save(source: str, target: str):
|
||||
""" Kép optimalizálása (szinkron végrehajtás a Pillow miatt). """
|
||||
with Image.open(source) as img:
|
||||
# Konvertálás RGB-be (PNG/RGBA -> JPEG támogatás miatt)
|
||||
rgb_img = img.convert('RGB')
|
||||
# Max szélesség 1600px az MB 2.0 Vault szabályai szerint
|
||||
if rgb_img.width > 1600:
|
||||
ratio = 1600 / float(rgb_img.width)
|
||||
new_height = int(float(rgb_img.height) * float(ratio))
|
||||
rgb_img = rgb_img.resize((1600, new_height), Image.Resampling.LANCZOS)
|
||||
|
||||
rgb_img.save(target, "JPEG", quality=85, optimize=True)
|
||||
|
||||
@classmethod
|
||||
async def process_queue(cls):
|
||||
async with SessionLocal() as db:
|
||||
# 1. Csak a várólistás és prémium jogosultságú dokumentumokat keressük
|
||||
stmt = select(Document, User).join(User).where(
|
||||
""" A várólista feldolgozása. """
|
||||
async with AsyncSessionLocal() as db:
|
||||
# 1. LOGIKA: Feladatok lekérése (Pending + Premium jogosultság)
|
||||
# A 'SKIP LOCKED' biztosítja, hogy több robot ne akadjon össze
|
||||
stmt = select(Document, User).join(User, Document.parent_id == User.scope_id).where(
|
||||
Document.status == "pending_ocr",
|
||||
User.subscription_plan.in_(["PREMIUM_PLUS", "VIP_PLUS"])
|
||||
).limit(10)
|
||||
User.subscription_plan.in_(["PREMIUM_PLUS", "VIP_PLUS", "PREMIUM", "VIP"])
|
||||
).limit(5)
|
||||
|
||||
res = await db.execute(stmt)
|
||||
tasks = res.all()
|
||||
|
||||
if not tasks:
|
||||
return
|
||||
|
||||
for doc, user in tasks:
|
||||
try:
|
||||
logger.info(f"📸 OCR feldolgozás: {doc.filename} (User: {user.id})")
|
||||
logger.info(f"📸 OCR megkezdése: {doc.original_name} (Szervezet: {user.scope_id})")
|
||||
|
||||
# 2. AI OCR hívás
|
||||
with open(doc.temp_path, "rb") as f:
|
||||
# Státusz zárolása
|
||||
doc.status = "processing"
|
||||
await db.commit()
|
||||
|
||||
# 2. LOGIKA: AI OCR hívás az AIService-en keresztül
|
||||
# Itt feltételezzük, hogy a Document modellben tároljuk a temp_path-t
|
||||
if not doc.file_hash: # Biztonsági check
|
||||
raise ValueError("Hiányzó fájl hivatkozás.")
|
||||
|
||||
temp_path = f"/app/temp/uploads/{doc.file_hash}"
|
||||
|
||||
if not os.path.exists(temp_path):
|
||||
raise FileNotFoundError(f"A forrásfájl nem található: {temp_path}")
|
||||
|
||||
with open(temp_path, "rb") as f:
|
||||
image_bytes = f.read()
|
||||
|
||||
ocr_result = await AIService.analyze_document_image(image_bytes, doc.doc_type)
|
||||
|
||||
|
||||
# AI felismerés (pl. Llama-Vision vagy GPT-4o)
|
||||
ocr_result = await AIService.get_clean_vehicle_data(
|
||||
make="OCR_SCAN",
|
||||
raw_model=doc.parent_type,
|
||||
v_type="document",
|
||||
sources={"image_data": "raw_scan"}
|
||||
)
|
||||
|
||||
if ocr_result:
|
||||
# 3. Kép átméretezése (Thumbnail és Standard)
|
||||
target_dir = os.path.join(NAS_BASE_PATH, user.folder_slug, doc.doc_type)
|
||||
# 3. LOGIKA: Vault mentés (NAS izoláció)
|
||||
target_dir = os.path.join(settings.NAS_STORAGE_PATH, user.folder_slug or "common", "vault")
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
|
||||
final_path = os.path.join(target_dir, f"{doc.id}.jpg")
|
||||
cls.resize_and_save(doc.temp_path, final_path)
|
||||
|
||||
# 4. Adatbázis frissítése
|
||||
doc.ocr_data = ocr_result
|
||||
doc.file_link = final_path
|
||||
doc.status = "processed"
|
||||
|
||||
# Ideiglenes fájl törlése
|
||||
os.remove(doc.temp_path)
|
||||
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"❌ OCR Hiba ({doc.id}): {e}")
|
||||
await db.rollback()
|
||||
final_filename = f"{doc.id}.jpg"
|
||||
final_path = os.path.join(target_dir, final_filename)
|
||||
|
||||
@staticmethod
|
||||
def resize_and_save(source, target):
|
||||
with Image.open(source) as img:
|
||||
img.convert('RGB').save(target, "JPEG", quality=85, optimize=True)
|
||||
# Kép feldolgozása külön szálon, hogy ne blokkolja az Async-et
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, cls._sync_resize_and_save, temp_path, final_path)
|
||||
|
||||
# 4. LOGIKA: Adatbázis frissítés (Gold Data előkészítés)
|
||||
doc.ocr_data = ocr_result
|
||||
doc.status = "processed"
|
||||
doc.file_size = os.path.getsize(final_path)
|
||||
|
||||
# Ideiglenes fájl takarítása
|
||||
os.remove(temp_path)
|
||||
logger.info(f"✅ Dokumentum sikeresen archiválva: {final_filename}")
|
||||
else:
|
||||
doc.status = "failed"
|
||||
doc.error_log = "AI returned empty result"
|
||||
|
||||
await db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ OCR Kritikus Hiba ({doc.id}): {str(e)}")
|
||||
await db.rollback()
|
||||
# Hibás státusz mentése
|
||||
async with AsyncSessionLocal() as error_db:
|
||||
await error_db.execute(
|
||||
update(Document).where(Document.id == doc.id).values(
|
||||
status="failed",
|
||||
error_log=str(e)
|
||||
)
|
||||
)
|
||||
await error_db.commit()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
""" Folyamatos futtatás (Service mode). """
|
||||
logger.info("🤖 Robot 3 (OCR) ONLINE - Figyeli a prémium dokumentumokat")
|
||||
while True:
|
||||
await cls.process_queue()
|
||||
await asyncio.sleep(15) # 15 másodpercenkénti ellenőrzés
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(OCRRobot.process_queue())
|
||||
asyncio.run(OCRRobot.run())
|
||||
74
backend/app/workers/osm_scout.py
Normal file
74
backend/app/workers/osm_scout.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/osm_scout.py
|
||||
import asyncio
|
||||
import json
|
||||
import httpx
|
||||
import hashlib
|
||||
import logging
|
||||
from urllib.parse import quote
|
||||
from sqlalchemy import select
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.staged_data import ServiceStaging
|
||||
|
||||
logger = logging.getLogger("Robot-OSM-Scout")
|
||||
|
||||
class OSMScout:
|
||||
"""
|
||||
Robot: OSM Scout (V2)
|
||||
Feladata: Országos, ingyenes adatgyűjtés az OpenStreetMap hálózatából.
|
||||
"""
|
||||
HUNGARY_BBOX = "45.7,16.1,48.6,22.9"
|
||||
OVERPASS_URL = "http://overpass-api.de/api/interpreter?data="
|
||||
|
||||
@staticmethod
|
||||
def generate_fingerprint(name: str, city: str) -> str:
|
||||
raw = f"{str(name).lower()}|{str(city).lower()}"
|
||||
return hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
async def fetch_osm_data(self, query_part: str):
|
||||
query = f'[out:json][timeout:120];(node{query_part}({self.HUNGARY_BBOX});way{query_part}({self.HUNGARY_BBOX}););out center;'
|
||||
async with httpx.AsyncClient(timeout=150) as client:
|
||||
try:
|
||||
resp = await client.get(self.OVERPASS_URL + quote(query))
|
||||
return resp.json().get('elements', []) if resp.status_code == 200 else []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Overpass hiba: {e}")
|
||||
return []
|
||||
|
||||
async def run(self):
|
||||
logger.info("🛰️ OSM Scout ONLINE - Országos porszívózás indítása...")
|
||||
|
||||
queries = ['["shop"~"car_repair|tyres"]', '["amenity"="car_wash"]']
|
||||
all_elements = []
|
||||
for q in queries:
|
||||
all_elements.extend(await self.fetch_osm_data(q))
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
added = 0
|
||||
for node in all_elements:
|
||||
tags = node.get('tags', {})
|
||||
if not tags.get('name'): continue
|
||||
|
||||
name = tags.get('name', tags.get('operator', 'Ismeretlen'))
|
||||
city = tags.get('addr:city', 'Ismeretlen')
|
||||
f_print = self.generate_fingerprint(name, city)
|
||||
|
||||
# Deduplikáció check
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == f_print)
|
||||
if not (await db.execute(stmt)).scalar():
|
||||
db.add(ServiceStaging(
|
||||
name=name,
|
||||
source="osm_scout_v2",
|
||||
fingerprint=f_print,
|
||||
city=city,
|
||||
full_address=f"{city}, {tags.get('addr:street', '')} {tags.get('addr:housenumber', '')}".strip(", "),
|
||||
status="pending",
|
||||
trust_score=20,
|
||||
raw_data=tags
|
||||
))
|
||||
added += 1
|
||||
|
||||
await db.commit()
|
||||
logger.info(f"✅ OSM Scout végzett. {added} új potenciális szerviz a Stagingben.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(OSMScout().run())
|
||||
@@ -1,117 +1,137 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/researcher_v2_1.py
|
||||
import asyncio
|
||||
import logging
|
||||
import warnings
|
||||
import os
|
||||
from sqlalchemy import select, update, and_, func, or_, case # Explicit case import
|
||||
from app.db.session import SessionLocal
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, List
|
||||
from sqlalchemy import select, update, and_, func, or_, case
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
import httpx
|
||||
|
||||
# 1. KRITIKUS JAVÍTÁS: A figyelmeztetések globális elnyomása az import előtt
|
||||
# DuckDuckGo search API hiba-elnyomás és import
|
||||
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
# Logolás beállítása, hogy lássuk a haladást
|
||||
# Logolás beállítása
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Robot-Researcher-v2.1")
|
||||
|
||||
class ResearcherBot:
|
||||
"""
|
||||
Robot 2.1: Az internet porszívója.
|
||||
Technikai adatokat gyűjt (DuckDuckGo), hogy előkészítse az AI dúsítást.
|
||||
Kihasználja a motorkódot és a gyártási évet a pontosabb találatokért.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.batch_size = 15
|
||||
self.max_parallel_queries = 5
|
||||
self.batch_size = 5 # Egyszerre 5 járművet vesz ki
|
||||
self.max_parallel_queries = 3 # Párhuzamos keresések száma
|
||||
|
||||
async def fetch_source(self, label, query):
|
||||
"""Egyedi forrás lekérése a DuckDuckGo-tól."""
|
||||
async def fetch_source(self, label: str, query: str) -> str:
|
||||
""" Egyedi forrás lekérése szálbiztos módon. """
|
||||
try:
|
||||
def search():
|
||||
# Az újabb verziókban a DDGS() hívás így a legstabilabb
|
||||
with DDGS() as ddgs:
|
||||
# Az első 3 találat body részét gyűjtjük be kontextusnak
|
||||
results = ddgs.text(query, max_results=3)
|
||||
return [r['body'] for r in results] if results else []
|
||||
return [f"[{r.get('title', 'No Title')}] {r.get('body', '')}" for r in results] if results else []
|
||||
|
||||
results = await asyncio.to_thread(search)
|
||||
|
||||
if not results:
|
||||
return f"=== SOURCE: {label} | NO DATA FOUND ===\n\n"
|
||||
return f"=== SOURCE: {label} | STATUS: EMPTY ===\n\n"
|
||||
|
||||
content = f"=== SOURCE: {label} | QUERY: {query} ===\n"
|
||||
content += "\n---\n".join(results)
|
||||
content += "\n=== END SOURCE ===\n\n"
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Keresési hiba ({label}): {e}")
|
||||
return f"=== SOURCE: {label} ERROR: {str(e)} ===\n\n"
|
||||
logger.error(f"❌ Keresési hiba ({label}): {str(e)}")
|
||||
return f"=== SOURCE: {label} | ERROR: {str(e)} ===\n\n"
|
||||
|
||||
async def research_vehicle(self, vehicle_id):
|
||||
async with SessionLocal() as db:
|
||||
async def research_vehicle(self, vehicle_id: int):
|
||||
""" Egyetlen jármű teljes körű átvilágítása. """
|
||||
async with AsyncSessionLocal() as db:
|
||||
res = await db.execute(select(VehicleModelDefinition).where(VehicleModelDefinition.id == vehicle_id))
|
||||
v = res.scalar_one_or_none()
|
||||
if not v: return
|
||||
|
||||
make, model = v.make, v.marketing_name
|
||||
# Jelöljük be, hogy a kutatás folyamatban van
|
||||
await db.execute(update(VehicleModelDefinition).where(VehicleModelDefinition.id == vehicle_id).values(status='research_in_progress'))
|
||||
make = v.make
|
||||
model = v.marketing_name
|
||||
engine = v.engine_code or ""
|
||||
year = f"{v.year_from}" if v.year_from else ""
|
||||
|
||||
# Státusz zárolása
|
||||
v.status = 'research_in_progress'
|
||||
await db.commit()
|
||||
|
||||
logger.info(f"🔎 Kutatás indul: {make} {model}")
|
||||
logger.info(f"🔎 Kutatás indul: {make} {model} (Motor: {engine}, Év: {year})")
|
||||
|
||||
# Célzott keresési kulcsszavak (Multi-Channel stratégia)
|
||||
queries = [
|
||||
("TECH_SPECS", f"{make} {model} technical specifications engine power"),
|
||||
("MAINTENANCE", f"{make} {model} service manual oil capacity spark plug"),
|
||||
("TIRES_BRAKES", f"{make} {model} tire size brake pad type"),
|
||||
("FLUIDS", f"{make} {model} coolant quantity transmission oil")
|
||||
("TECH_SPECS", f"{make} {model} {engine} {year} technical specifications engine power kw torque"),
|
||||
("MAINTENANCE", f"{make} {model} {engine} oil capacity coolant transmission fluid type capacity"),
|
||||
("TIRES_PROD", f"{make} {model} {year} tire size load index production years status")
|
||||
]
|
||||
|
||||
# Párhuzamos forrásgyűjtés
|
||||
tasks = [self.fetch_source(label, q) for label, q in queries]
|
||||
search_results = await asyncio.gather(*tasks)
|
||||
|
||||
full_context = "".join(search_results)
|
||||
|
||||
async with SessionLocal() as db:
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
raw_search_context=full_context,
|
||||
status='awaiting_ai_synthesis', # Itt adjuk át a Robot 2.2-nek (Alchemist)
|
||||
updated_at=func.now()
|
||||
async with AsyncSessionLocal() as db:
|
||||
if len(full_context.strip()) > 200: # Ha van elegendő kontextus
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
raw_search_context=full_context,
|
||||
status='awaiting_ai_synthesis', # Átadás a Robot 2.2-nek
|
||||
last_research_at=func.now(),
|
||||
attempts=VehicleModelDefinition.attempts + 1
|
||||
)
|
||||
)
|
||||
)
|
||||
logger.info(f"✅ Kontextus rögzítve: {make} {model}")
|
||||
else:
|
||||
# Sikertelen keresés, visszatesszük később
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
status='unverified',
|
||||
attempts=VehicleModelDefinition.attempts + 1,
|
||||
last_research_at=func.now()
|
||||
)
|
||||
)
|
||||
logger.warning(f"⚠️ Kevés adat: {make} {model} - Újrapróbálkozás később")
|
||||
await db.commit()
|
||||
logger.info(f"✅ Kutatás kész, adat a tartályban: {make} {model}")
|
||||
|
||||
async def run(self):
|
||||
logger.info("🚀 Robot 2.1 (Researcher) ONLINE")
|
||||
logger.info("🚀 Robot 2.1 (Researcher) ONLINE - Cél: 407 Toyota feldolgozása")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
# 2. KRITIKUS JAVÍTÁS: func.case helyett az explicit case() használata
|
||||
# Ez javítja a "TypeError: got an unexpected keyword argument 'else_'" hibát
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Prioritás: unverified autók előre
|
||||
priorities = case(
|
||||
(and_(VehicleModelDefinition.vehicle_type == 'car',
|
||||
VehicleModelDefinition.make.in_(['SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL'])), 1),
|
||||
(VehicleModelDefinition.vehicle_type == 'car', 2),
|
||||
(and_(VehicleModelDefinition.vehicle_type == 'motorcycle',
|
||||
VehicleModelDefinition.make.in_(['HONDA', 'YAMAHA', 'SUZUKI', 'KAWASAKI'])), 3),
|
||||
else_=4
|
||||
(VehicleModelDefinition.make == 'TOYOTA', 1),
|
||||
else_=2
|
||||
)
|
||||
|
||||
stmt = select(VehicleModelDefinition.id).where(
|
||||
or_(VehicleModelDefinition.status == 'unverified', VehicleModelDefinition.status == 'awaiting_research')
|
||||
).order_by(priorities).limit(self.batch_size)
|
||||
or_(VehicleModelDefinition.status == 'unverified',
|
||||
VehicleModelDefinition.status == 'awaiting_research')
|
||||
).order_by(priorities, VehicleModelDefinition.attempts.asc()).limit(self.batch_size)
|
||||
|
||||
res = await db.execute(stmt)
|
||||
ids = [r[0] for r in res.fetchall()]
|
||||
|
||||
if not ids:
|
||||
logger.info("💤 Nincs több feldolgozandó feladat, pihenés...")
|
||||
await asyncio.sleep(60)
|
||||
await asyncio.sleep(30)
|
||||
continue
|
||||
|
||||
# Batch feldolgozás indítása párhuzamosan
|
||||
await asyncio.gather(*[self.research_vehicle(rid) for rid in ids])
|
||||
|
||||
# Rövid szünet a keresőmotorok kímélése érdekében
|
||||
await asyncio.sleep(2)
|
||||
# Szekvenciális feldolgozás a rate-limit miatt
|
||||
for rid in ids:
|
||||
await self.research_vehicle(rid)
|
||||
await asyncio.sleep(5) # 5 másodperc szünet a keresések között
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ResearcherBot().run())
|
||||
@@ -1,83 +1,123 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/robot0_priority_setter.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
from sqlalchemy import text
|
||||
from app.db.session import SessionLocal
|
||||
from app.db.session import AsyncSessionLocal
|
||||
|
||||
# Logolás beállítása a Sentinel rendszerhez
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
|
||||
logger = logging.getLogger("Robot-0-Strategist")
|
||||
|
||||
class Robot0Strategist:
|
||||
"""
|
||||
Robot 0: A Stratéga.
|
||||
Meghatározza a feldolgozási prioritásokat a valós piaci darabszámok alapján.
|
||||
"""
|
||||
RDW_API = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
|
||||
# Holland típusok leképezése a mi kategóriáinkra a kért sorrendben
|
||||
# Holland típusok leképezése belső kategóriákra (MB 2.0 prioritás)
|
||||
CATEGORIES = [
|
||||
{"name": "car", "rdw_types": ["'Personenauto'"]},
|
||||
{"name": "motorcycle", "rdw_types": ["'Motorfiets'"]},
|
||||
{"name": "truck", "rdw_types": ["'Bedrijfswagen'", "'Vrachtwagen'", "'Opleggertrekker'"]},
|
||||
{"name": "other", "rdw_types": ["NOT IN ('Personenauto', 'Motorfiets', 'Bedrijfswagen', 'Vrachtwagen', 'Opleggertrekker')"]}
|
||||
# JAVÍTVA: Bedrijfsauto hozzáadva, Bedrijfswagen törölve
|
||||
{"name": "truck", "rdw_types": ["'Bedrijfsauto'", "'Vrachtwagen'", "'Opleggertrekker'"]},
|
||||
{"name": "other", "rdw_types": ["NOT IN ('Personenauto', 'Motorfiets', 'Bedrijfsauto', 'Vrachtwagen', 'Opleggertrekker')"]}
|
||||
]
|
||||
|
||||
async def get_popular_makes(self, vehicle_class, rdw_types):
|
||||
"""Lekéri az adott kategória legnépszerűbb márkáit az RDW-től."""
|
||||
# SQL-szerű szűrés az API-n keresztül
|
||||
type_filter = " OR ".join([f"voertuigsoort = {t}" for t in rdw_types])
|
||||
if "NOT IN" in rdw_types[0]: # Speciális kezelés az 'egyéb' kategóriához
|
||||
async def get_popular_makes(self, vehicle_class: str, rdw_types: list):
|
||||
""" Lekéri az adott kategória 500 legnépszerűbb márkáját. """
|
||||
|
||||
# SoQL filter összeállítása
|
||||
if "NOT IN" in rdw_types[0]:
|
||||
type_filter = f"voertuigsoort {rdw_types[0]}"
|
||||
else:
|
||||
type_filter = " OR ".join([f"voertuigsoort = {t}" for t in rdw_types])
|
||||
|
||||
params = {
|
||||
"$select": "merk, count(*)",
|
||||
"$select": "merk, count(*) AS darabszam", # Itt adjuk meg az aliast
|
||||
"$where": type_filter,
|
||||
"$group": "merk",
|
||||
"$order": "count DESC",
|
||||
"$limit": 500 # Kategóriánként az 500 legfontosabb márka bőven elég
|
||||
"$order": "darabszam DESC", # Itt hivatkozunk rá
|
||||
"$limit": 500
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
async with httpx.AsyncClient(timeout=45.0) as client:
|
||||
try:
|
||||
resp = await client.get(self.RDW_API, params=params, headers=self.HEADERS)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
logger.error(f"⚠️ API Hiba ({vehicle_class}): {resp.status_code}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Hiba a {vehicle_class} lekérdezésekor: {e}")
|
||||
logger.error(f"❌ Kapcsolati hiba ({vehicle_class}): {e}")
|
||||
return []
|
||||
|
||||
async def run(self):
|
||||
logger.info("🚀 Robot 0 (Strategist) INDUL - Piaci alapú sorrend felállítása...")
|
||||
""" A stratégiai prioritás-beállítás futtatása. """
|
||||
logger.info("🚀 Robot 0 (Strategist) INDUL - Piaci prioritások elemzése...")
|
||||
|
||||
async with SessionLocal() as db:
|
||||
# 1. Töröljük a jelenlegi várólistát, hogy tiszta lappal induljunk (opcionális)
|
||||
# await db.execute(text("DELETE FROM data.catalog_discovery WHERE status = 'pending'"))
|
||||
|
||||
for category in self.CATEGORIES:
|
||||
v_class = category["name"]
|
||||
logger.info(f"📊 {v_class.upper()} kategória elemzése...")
|
||||
|
||||
makes = await self.get_popular_makes(v_class, category["rdw_types"])
|
||||
|
||||
added_count = 0
|
||||
for item in makes:
|
||||
make_name = item.get("merk")
|
||||
if not make_name: continue
|
||||
|
||||
# Beillesztés a Discovery táblába
|
||||
# A prioritást az ID-k sorrendje fogja adni, amit Robot 1 követ
|
||||
await db.execute(text("""
|
||||
INSERT INTO data.catalog_discovery (make, model, vehicle_class, status, source)
|
||||
VALUES (:make, 'ALL_MODELS', :class, 'pending', 'ROBOT-0-POPULARITY')
|
||||
ON CONFLICT (make, model, vehicle_class) DO UPDATE
|
||||
SET status = 'pending' WHERE catalog_discovery.status != 'processed'
|
||||
"""), {"make": make_name.upper(), "class": v_class})
|
||||
added_count += 1
|
||||
|
||||
# --- ÖNGYÓGYÍTÓ ADATBÁZIS JAVÍTÁS ---
|
||||
# Garantáljuk, hogy a priority_score oszlop létezik a táblában
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
await db.execute(text("ALTER TABLE data.catalog_discovery ADD COLUMN IF NOT EXISTS priority_score INTEGER DEFAULT 0;"))
|
||||
await db.commit()
|
||||
logger.info(f"✅ {v_class.upper()}: {added_count} márka sorba állítva a népszerűség alapján.")
|
||||
logger.info("✅ Adatbázis séma ellenőrizve: priority_score oszlop aktív.")
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"⚠️ Nem sikerült ellenőrizni az oszlopot: {e}")
|
||||
# ------------------------------------
|
||||
|
||||
# Nem nyitunk itt globális db-t, hanem a cikluson belül kezeljük
|
||||
for category in self.CATEGORIES:
|
||||
v_class = category["name"]
|
||||
logger.info(f"📊 {v_class.upper()} elemzés és sorbarendezés...")
|
||||
|
||||
makes = await self.get_popular_makes(v_class, category["rdw_types"])
|
||||
|
||||
if not makes:
|
||||
logger.warning(f"⚠️ {v_class.upper()}: Nincs visszaadott adat az RDW-től!")
|
||||
continue
|
||||
|
||||
logger.info("🏁 Robot 0 végzett. A Discovery tábla készen áll a Robot 1 (Hunter) számára!")
|
||||
added_count = 0
|
||||
for item in makes:
|
||||
make_name = str(item.get("merk", "")).upper().strip()
|
||||
if not make_name:
|
||||
continue
|
||||
|
||||
count = int(item.get("darabszam", 0))
|
||||
|
||||
# DEBUG: ellenőrizzük az 'item'-et
|
||||
if added_count == 0:
|
||||
logger.info(f"🧬 Elem felépítése: {item} -> Kinyert márka: {make_name}, Prioritás: {count}")
|
||||
|
||||
# Minden egyes márkához saját session-t nyitunk
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
# JAVÍTÁS: beletettük az attempts (0) és a priority_score (:score) oszlopokat!
|
||||
query = text("""
|
||||
INSERT INTO data.catalog_discovery (make, model, vehicle_class, status, source, attempts, priority_score)
|
||||
VALUES (:make, 'ALL_VARIANTS', :class, 'pending', 'STRATEGIST-POPULARITY-V2', 0, :score)
|
||||
ON CONFLICT (make, model, vehicle_class)
|
||||
DO UPDATE SET status = 'pending', priority_score = :score
|
||||
WHERE catalog_discovery.status NOT IN ('processed', 'in_progress');
|
||||
""")
|
||||
|
||||
# Átadjuk a query-nek a 'score' paramétert is
|
||||
await db.execute(query, {"make": make_name, "class": v_class, "score": count})
|
||||
await db.commit()
|
||||
added_count += 1
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.warning(f"❌ Sikertelen rögzítés ({make_name}): {e}")
|
||||
|
||||
logger.info(f"✅ {v_class.upper()} kész: {added_count} márka prioritizálva.")
|
||||
|
||||
logger.info("🏁 Robot 0 végzett. A terep előkészítve a Hunterek számára.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(Robot0Strategist().run())
|
||||
@@ -1,42 +1,84 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/service_auditor.py
|
||||
import asyncio
|
||||
import logging
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.organization import Organization
|
||||
from app.models.service import ServiceProfile
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import select, and_
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.organization import Organization, OrgType
|
||||
from app.models.service import ServiceProfile
|
||||
|
||||
logger = logging.getLogger("Robot2-Auditor")
|
||||
# Logolás beállítása a Sentinel rendszerhez
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
|
||||
logger = logging.getLogger("Robot-Service-Auditor")
|
||||
|
||||
class ServiceAuditor:
|
||||
"""
|
||||
Robot: Service Auditor.
|
||||
Feladata a meglévő szerviz szolgáltatók adatainak validálása és a megszűnt helyek inaktiválása.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
async def audit_services(cls):
|
||||
"""Időszakos ellenőrzés a megszűnt helyek kiszűrésére."""
|
||||
async with SessionLocal() as db:
|
||||
# Csak az aktív szervizeket nézzük
|
||||
""" Időszakos ellenőrzés a megszűnt helyek kiszűrésére. """
|
||||
async with AsyncSessionLocal() as db:
|
||||
# 1. LOGIKA: Csak az aktív szerviz típusú szervezeteket keressük
|
||||
stmt = select(Organization).where(
|
||||
and_(Organization.org_type == "service", Organization.is_active == True)
|
||||
and_(
|
||||
Organization.org_type == OrgType.service,
|
||||
Organization.is_active == True
|
||||
)
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
services = result.scalars().all()
|
||||
|
||||
logger.info(f"🕵️ Audit indítása {len(services)} szerviznél...")
|
||||
|
||||
for service in services:
|
||||
# 1. Ellenőrzés külső forrásnál (API hívás helye)
|
||||
# status = await check_external_status(service.full_name)
|
||||
is_still_open = True # Itt jön az OSM/Google API válasza
|
||||
|
||||
if not is_still_open:
|
||||
service.is_active = False # SOFT-DELETE
|
||||
logger.info(f"⚠️ Szerviz inaktiválva (megszűnt): {service.full_name}")
|
||||
|
||||
# Rate limit védelem
|
||||
await asyncio.sleep(2)
|
||||
|
||||
try:
|
||||
# 2. LOGIKA: Ellenőrzés külső forrásnál (API hívás OSM/Google/Cégtár felé)
|
||||
# Itt futhat le egy külső keresés a név és cím alapján.
|
||||
# Példa: status = await external_api.is_still_operating(service.id)
|
||||
is_still_open = True # Ez a szimulált API válasz
|
||||
|
||||
# 3. LOGIKA: MDM Frissítés
|
||||
stmt_profile = select(ServiceProfile).where(ServiceProfile.organization_id == service.id)
|
||||
profile_res = await db.execute(stmt_profile)
|
||||
profile = profile_res.scalar_one_or_none()
|
||||
|
||||
if not is_still_open:
|
||||
# Soft-delete: a szervezet inaktív lesz, a profil státusza bezárt
|
||||
service.is_active = False
|
||||
if profile:
|
||||
profile.status = 'closed'
|
||||
profile.last_audit_at = datetime.now(timezone.utc)
|
||||
logger.info(f"⚠️ Szerviz inaktiválva (megszűnt): {service.full_name}")
|
||||
else:
|
||||
# Ha nyitva van, csak az audit dátumát frissítjük
|
||||
if profile:
|
||||
profile.last_audit_at = datetime.now(timezone.utc)
|
||||
|
||||
# 4. Rate limit védelem a külső API-k és a DB terhelés kímélése érdekében
|
||||
await asyncio.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Hiba a(z) {service.full_name} auditálása közben: {str(e)}")
|
||||
|
||||
# A tranzakció lezárása
|
||||
await db.commit()
|
||||
logger.info("✅ Szerviz-audit folyamat befejeződött.")
|
||||
|
||||
@classmethod
|
||||
async def run_periodic_audit(cls):
|
||||
""" Folyamatos futtatás (Service mode). """
|
||||
while True:
|
||||
logger.info("🕵️ Negyedéves szerviz-audit indítása...")
|
||||
await cls.audit_services()
|
||||
# 90 naponta fusson le teljes körűen
|
||||
await asyncio.sleep(90 * 86400)
|
||||
try:
|
||||
# Alapértelmezett futási ciklus (pl. 90 naponta)
|
||||
await cls.audit_services()
|
||||
logger.info("💤 Auditor robot pihenőre tér (90 nap).")
|
||||
await asyncio.sleep(90 * 86400)
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Kritikus hiba az Auditor robotban: {e}")
|
||||
await asyncio.sleep(3600) # Hiba esetén 1 óra múlva újrapróbálja
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ServiceAuditor.run_periodic_audit())
|
||||
@@ -1,3 +1,4 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/service_hunter.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
@@ -6,156 +7,167 @@ import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text, update
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.service import ServiceStaging, DiscoveryParameter
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.staged_data import ServiceStaging, DiscoveryParameter
|
||||
|
||||
# Naplózás
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger("Robot-v1.3.1-ContinentalScout")
|
||||
# Naplózás beállítása a Sentinel monitorozáshoz
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Robot-Continental-Scout-v1.3")
|
||||
|
||||
class ServiceHunter:
|
||||
"""
|
||||
Robot v1.3.1: Continental Scout (Grid Search Edition)
|
||||
- Dinamikus rácsbejárás a sűrű területek lefedésére.
|
||||
- Ujjlenyomat-alapú deduplikáció.
|
||||
- Bővített kulcsszókezelés.
|
||||
Felelőssége: Új szervizpontok felfedezése külső API-k alapján.
|
||||
"""
|
||||
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
|
||||
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
|
||||
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
||||
|
||||
@classmethod
|
||||
def generate_fingerprint(cls, name: str, city: str, street: str) -> str:
|
||||
"""Egyedi ujjlenyomat készítése a duplikációk kiszűrésére."""
|
||||
raw_string = f"{str(name).lower()}|{str(city).lower()}|{str(street).lower()[:5]}"
|
||||
return hashlib.md5(raw_string.encode()).hexdigest()
|
||||
def _generate_fingerprint(cls, name: str, city: str, address: str) -> str:
|
||||
"""
|
||||
MD5 Ujjlenyomat generálása.
|
||||
Ez biztosítja, hogy ha ugyanazt a helyet több rács-cellából is megtaláljuk,
|
||||
ne jöjjön létre duplikált rekord.
|
||||
"""
|
||||
raw = f"{str(name).lower()}|{str(city).lower()}|{str(address).lower()[:10]}"
|
||||
return hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
@classmethod
|
||||
async def get_city_bounds(cls, city, country_code):
|
||||
"""Város befoglaló téglalapjának (Bounding Box) lekérése Nominatim-al."""
|
||||
async def _get_city_bounds(cls, city: str, country_code: str):
|
||||
""" Nominatim API hívás a város befoglaló téglalapjának lekéréséhez. """
|
||||
url = "https://nominatim.openstreetmap.org/search"
|
||||
params = {"city": city, "country": country_code, "format": "json"}
|
||||
async with httpx.AsyncClient(headers={"User-Agent": "ServiceFinder-Scout/1.0"}) as client:
|
||||
resp = await client.get(url, params=params)
|
||||
if resp.status_code == 200 and resp.json():
|
||||
bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon]
|
||||
return [float(x) for x in bbox]
|
||||
headers = {"User-Agent": "ServiceFinder-Scout-v1.3/2.0 (contact@servicefinder.com)"}
|
||||
|
||||
async with httpx.AsyncClient(headers=headers, timeout=10) as client:
|
||||
try:
|
||||
resp = await client.get(url, params=params)
|
||||
if resp.status_code == 200 and resp.json():
|
||||
bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon]
|
||||
return [float(x) for x in bbox]
|
||||
except Exception as e:
|
||||
logger.error(f"⚠️ Városhatár lekérdezési hiba ({city}): {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def run_grid_search(cls, db, task):
|
||||
"""Rács-alapú bejárás a városon belül."""
|
||||
bbox = await cls.get_city_bounds(task.city, task.country_code)
|
||||
if not bbox: return
|
||||
async def get_google_places(cls, lat: float, lon: float):
|
||||
""" Google Places V1 (New) API hívás. """
|
||||
if not cls.GOOGLE_API_KEY:
|
||||
logger.error("❌ Google API Key hiányzik!")
|
||||
return []
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress,places.location"
|
||||
}
|
||||
# MB 2.0 szűrők: Csak releváns típusok
|
||||
payload = {
|
||||
"includedTypes": ["car_repair", "motorcycle_repair", "car_wash", "tire_shop"],
|
||||
"maxResultCount": 20,
|
||||
"locationRestriction": {
|
||||
"circle": {
|
||||
"center": {"latitude": lat, "longitude": lon},
|
||||
"radius": 1200.0 # 1.2km sugarú körök a jó átfedéshez
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
try:
|
||||
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
|
||||
if resp.status_code == 200:
|
||||
return resp.json().get("places", [])
|
||||
logger.warning(f"Google API hiba: {resp.status_code} - {resp.text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Google API hívás hiba: {e}")
|
||||
return []
|
||||
|
||||
# 1km-es lépések generálása (kb. 0.01 fok)
|
||||
lat_step = 0.015
|
||||
lon_step = 0.02
|
||||
@classmethod
|
||||
async def _save_to_staging(cls, db: AsyncSession, task, p_data: dict):
|
||||
""" Adatmentés a staging táblába deduplikációval. """
|
||||
name = p_data.get('displayName', {}).get('text')
|
||||
addr = p_data.get('formattedAddress', '')
|
||||
f_print = cls._generate_fingerprint(name, task.city, addr)
|
||||
|
||||
# Ellenőrzés, hogy létezik-e már (Ujjlenyomat alapján)
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == f_print)
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Csak a bizalmi pontot és az utolsó észlelést frissítjük
|
||||
existing.trust_score += 2
|
||||
existing.updated_at = datetime.now(timezone.utc)
|
||||
return
|
||||
|
||||
# Új rekord létrehozása
|
||||
new_entry = ServiceStaging(
|
||||
name=name,
|
||||
source="google_scout_v1.3",
|
||||
external_id=p_data.get('id'),
|
||||
fingerprint=f_print,
|
||||
city=task.city,
|
||||
full_address=addr,
|
||||
contact_phone=p_data.get('internationalPhoneNumber'),
|
||||
website=p_data.get('websiteUri'),
|
||||
raw_data=p_data,
|
||||
status="pending",
|
||||
trust_score=30 # Alapértelmezett bizalmi szint
|
||||
)
|
||||
db.add(new_entry)
|
||||
|
||||
@classmethod
|
||||
async def run_grid_search(cls, db: AsyncSession, task: DiscoveryParameter):
|
||||
""" A város koordináta-alapú bejárása. """
|
||||
bbox = await cls._get_city_bounds(task.city, task.country_code or 'HU')
|
||||
if not bbox:
|
||||
return
|
||||
|
||||
# Lépésközök meghatározása (kb. 1km = 0.01 fok)
|
||||
lat_step = 0.012
|
||||
lon_step = 0.018
|
||||
|
||||
curr_lat = bbox[0]
|
||||
while curr_lat < bbox[1]:
|
||||
curr_lon = bbox[2]
|
||||
while curr_lon < bbox[3]:
|
||||
logger.info(f"🛰️ Rács-cella pásztázása: {curr_lat}, {curr_lon} - Kulcsszó: {task.keyword}")
|
||||
places = await cls.get_google_places(curr_lat, curr_lon, task.keyword)
|
||||
logger.info(f"🛰️ Cella pásztázása: {curr_lat:.4f}, {curr_lon:.4f} ({task.city})")
|
||||
|
||||
places = await cls.get_google_places(curr_lat, curr_lon)
|
||||
for p in places:
|
||||
# Adatok kinyerése és tisztítása
|
||||
name = p.get('displayName', {}).get('text')
|
||||
full_addr = p.get('formattedAddress', '')
|
||||
|
||||
# Ujjlenyomat generálás
|
||||
f_print = cls.generate_fingerprint(name, task.city, full_addr)
|
||||
|
||||
await cls.save_to_staging(db, {
|
||||
"external_id": p.get('id'),
|
||||
"name": name,
|
||||
"full_address": full_addr,
|
||||
"phone": p.get('internationalPhoneNumber'),
|
||||
"website": p.get('websiteUri'),
|
||||
"fingerprint": f_print,
|
||||
"city": task.city,
|
||||
"source": "google",
|
||||
"raw": p,
|
||||
"trust": 30
|
||||
})
|
||||
await cls._save_to_staging(db, task, p)
|
||||
|
||||
await db.commit() # Cellánként mentünk, hogy ne vesszen el a munka
|
||||
curr_lon += lon_step
|
||||
await asyncio.sleep(0.5) # API védelem
|
||||
await asyncio.sleep(0.3) # Rate limit védelem
|
||||
curr_lat += lat_step
|
||||
|
||||
@classmethod
|
||||
async def get_google_places(cls, lat, lon, keyword):
|
||||
"""Google Places New API hívás rács-pontra."""
|
||||
if not cls.GOOGLE_API_KEY: return []
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
|
||||
}
|
||||
payload = {
|
||||
"includedTypes": ["car_repair", "motorcycle_repair"],
|
||||
"maxResultCount": 20,
|
||||
"locationRestriction": {
|
||||
"circle": {
|
||||
"center": {"latitude": lat, "longitude": lon},
|
||||
"radius": 1500.0 # 1.5km sugarú kör a fedés érdekében
|
||||
}
|
||||
}
|
||||
}
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
|
||||
return resp.json().get("places", []) if resp.status_code == 200 else []
|
||||
|
||||
@classmethod
|
||||
async def save_to_staging(cls, db: AsyncSession, data: dict):
|
||||
"""Mentés ujjlenyomat ellenőrzéssel."""
|
||||
# 1. Megnézzük, létezik-e már ez az ujjlenyomat
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == data['fingerprint'])
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Csak a bizalmi pontot növeljük és az utolsó észlelést frissítjük
|
||||
existing.trust_score += 5
|
||||
return
|
||||
|
||||
new_entry = ServiceStaging(
|
||||
name=data['name'],
|
||||
source=data['source'],
|
||||
external_id=str(data['external_id']),
|
||||
fingerprint=data['fingerprint'],
|
||||
city=data['city'],
|
||||
full_address=data['full_address'],
|
||||
contact_phone=data['phone'],
|
||||
website=data['website'],
|
||||
raw_data=data.get('raw', {}),
|
||||
status="pending",
|
||||
trust_score=data.get('trust', 30)
|
||||
)
|
||||
db.add(new_entry)
|
||||
await db.flush()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Continental Scout v1.3.1 - Grid Engine INDUL...")
|
||||
""" A robot fő hurokfolyamata. """
|
||||
logger.info("🤖 Continental Scout ONLINE - Grid Engine Indul...")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
await db.execute(text("SET search_path TO data, public"))
|
||||
# Aktív keresési feladatok lekérése
|
||||
stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True)
|
||||
tasks = (await db.execute(stmt)).scalars().all()
|
||||
|
||||
for task in tasks:
|
||||
logger.info(f"🔎 Mélyfúrás indítása: {task.city} -> {task.keyword}")
|
||||
await cls.run_grid_search(db, task)
|
||||
|
||||
task.last_run_at = datetime.now(timezone.utc)
|
||||
await db.commit()
|
||||
# Csak akkor futtatjuk, ha már régen volt (pl. 30 naponta)
|
||||
if not task.last_run_at or (datetime.now(timezone.utc) - task.last_run_at).days >= 30:
|
||||
logger.info(f"🔎 Felderítés indítása: {task.city}")
|
||||
await cls.run_grid_search(db, task)
|
||||
|
||||
task.last_run_at = datetime.now(timezone.utc)
|
||||
await db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💥 Hiba: {e}")
|
||||
logger.error(f"💥 Kritikus hiba a Scout robotban: {e}")
|
||||
await db.rollback()
|
||||
|
||||
await asyncio.sleep(3600)
|
||||
# 6 óránként ellenőrizzük, van-e új feladat
|
||||
await asyncio.sleep(21600)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ServiceHunter.run())
|
||||
@@ -1,282 +0,0 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import uuid
|
||||
import os
|
||||
import sys
|
||||
import csv
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.orm import selectinload
|
||||
from app.db.session import SessionLocal
|
||||
|
||||
# Modellek importálása
|
||||
from app.models.service import ServiceProfile, ExpertiseTag
|
||||
from app.models.organization import Organization, OrganizationFinancials, OrgType, OrgUserRole, OrganizationMember
|
||||
from app.models.identity import Person
|
||||
from app.models.address import Address, GeoPostalCode
|
||||
from geoalchemy2.elements import WKTElement
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# Naplózás beállítása
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Robot2-Dunakeszi-Detective")
|
||||
|
||||
class ServiceHunter:
|
||||
"""
|
||||
Robot 2.7.2: Dunakeszi Detective - Deep Model Integration.
|
||||
Logika:
|
||||
1. Helyi CSV (Saját beküldés - Cím alapú Geocoding-al - 50 pont Trust)
|
||||
2. OSM (Közösségi adat - 10 pont Trust)
|
||||
3. Google (Adatpótlás/Fallback - 30 pont Trust)
|
||||
"""
|
||||
OVERPASS_URL = "http://overpass-api.de/api/interpreter"
|
||||
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
|
||||
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
|
||||
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
||||
LOCAL_CSV_PATH = "/app/app/workers/local_services.csv"
|
||||
|
||||
@classmethod
|
||||
async def geocode_address(cls, address_text):
|
||||
"""Cím szövegből GPS koordinátát és címkomponenseket csinál."""
|
||||
if not cls.GOOGLE_API_KEY:
|
||||
logger.warning("⚠️ Google API kulcs hiányzik!")
|
||||
return None
|
||||
|
||||
params = {"address": address_text, "key": cls.GOOGLE_API_KEY}
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(cls.GEOCODE_URL, params=params, timeout=10)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data.get("results"):
|
||||
result = data["results"][0]
|
||||
loc = result["geometry"]["location"]
|
||||
|
||||
# Címkomponensek kinyerése a kötelező mezőkhöz
|
||||
components = result.get("address_components", [])
|
||||
parsed = {"lat": loc["lat"], "lng": loc["lng"], "zip": "", "city": "", "street": "Ismeretlen", "type": "utca", "number": "1"}
|
||||
|
||||
for c in components:
|
||||
types = c.get("types", [])
|
||||
if "postal_code" in types: parsed["zip"] = c["long_name"]
|
||||
if "locality" in types: parsed["city"] = c["long_name"]
|
||||
if "route" in types: parsed["street"] = c["long_name"]
|
||||
if "street_number" in types: parsed["number"] = c["long_name"]
|
||||
|
||||
logger.info(f"📍 Geocoding sikeres: {address_text}")
|
||||
return parsed
|
||||
else:
|
||||
logger.error(f"❌ Geocoding hiba: {resp.status_code}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Geocoding hiba: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def get_google_place_details_new(cls, lat, lon):
|
||||
"""Google Places API (New) - Adatpótlás FieldMask használatával."""
|
||||
if not cls.GOOGLE_API_KEY:
|
||||
return None
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.types,places.internationalPhoneNumber,places.websiteUri"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"includedTypes": ["car_repair", "gas_station", "ev_charging_station", "car_wash", "motorcycle_repair"],
|
||||
"maxResultCount": 1,
|
||||
"locationRestriction": {
|
||||
"circle": {
|
||||
"center": {"latitude": lat, "longitude": lon},
|
||||
"radius": 40.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers, timeout=10)
|
||||
if resp.status_code == 200:
|
||||
places = resp.json().get("places", [])
|
||||
if places:
|
||||
p = places[0]
|
||||
return {
|
||||
"name": p.get("displayName", {}).get("text"),
|
||||
"google_id": p.get("id"),
|
||||
"types": p.get("types", []),
|
||||
"phone": p.get("internationalPhoneNumber"),
|
||||
"website": p.get("websiteUri")
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Google kiegészítő hívás hiba: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def import_local_csv(cls, db: AsyncSession):
|
||||
"""Manuális adatok betöltése CSV-ből."""
|
||||
if not os.path.exists(cls.LOCAL_CSV_PATH):
|
||||
return
|
||||
|
||||
try:
|
||||
with open(cls.LOCAL_CSV_PATH, mode='r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
geo_data = None
|
||||
if row.get('cim'):
|
||||
geo_data = await cls.geocode_address(row['cim'])
|
||||
|
||||
if geo_data:
|
||||
element = {
|
||||
"tags": {
|
||||
"name": row['nev'], "phone": row.get('telefon'),
|
||||
"website": row.get('web'), "amenity": row.get('tipus', 'car_repair'),
|
||||
"addr:full": row.get('cim'),
|
||||
"addr:city": geo_data["city"], "addr:zip": geo_data["zip"],
|
||||
"addr:street": geo_data["street"], "addr:type": geo_data["type"],
|
||||
"addr:number": geo_data["number"]
|
||||
},
|
||||
"lat": geo_data["lat"], "lon": geo_data["lng"]
|
||||
}
|
||||
await cls.save_service_deep(db, element, source="local_manual")
|
||||
logger.info("✅ Helyi CSV adatok feldolgozva.")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ CSV feldolgozási hiba: {e}")
|
||||
|
||||
@classmethod
|
||||
async def get_or_create_person(cls, db: AsyncSession, name: str) -> Person:
|
||||
"""Ghost Person kezelése."""
|
||||
names = name.split(' ', 1)
|
||||
last_name = names[0]
|
||||
first_name = names[1] if len(names) > 1 else "Ismeretlen"
|
||||
stmt = select(Person).where(Person.last_name == last_name, Person.first_name == first_name)
|
||||
result = await db.execute(stmt); person = result.scalar_one_or_none()
|
||||
if not person:
|
||||
person = Person(last_name=last_name, first_name=first_name, is_ghost=True, is_active=False)
|
||||
db.add(person); await db.flush()
|
||||
return person
|
||||
|
||||
@classmethod
|
||||
async def enrich_financials(cls, db: AsyncSession, org_id: int):
|
||||
"""Pénzügyi rekord inicializálása."""
|
||||
financial = OrganizationFinancials(
|
||||
organization_id=org_id, year=datetime.now(timezone.utc).year - 1, source="bot_discovery"
|
||||
)
|
||||
db.add(financial)
|
||||
|
||||
@classmethod
|
||||
async def save_service_deep(cls, db: AsyncSession, element: dict, source="osm"):
|
||||
"""Mély mentés a modelled specifikus mezőneveivel és kötelező értékeivel."""
|
||||
tags = element.get("tags", {})
|
||||
lat, lon = element.get("lat"), element.get("lon")
|
||||
if not lat or not lon: return
|
||||
|
||||
osm_name = tags.get("name") or tags.get("brand") or tags.get("operator")
|
||||
google_data = None
|
||||
if not osm_name or osm_name.lower() in ['aprilia', 'bosch', 'shell', 'mol', 'omv', 'ismeretlen']:
|
||||
google_data = await cls.get_google_place_details_new(lat, lon)
|
||||
|
||||
final_name = (google_data["name"] if google_data else osm_name) or "Ismeretlen Szolgáltató"
|
||||
|
||||
stmt = select(Organization).where(Organization.full_name == final_name)
|
||||
result = await db.execute(stmt); org = result.scalar_one_or_none()
|
||||
|
||||
if not org:
|
||||
# 1. Address létrehozása (a kötelező mezőket kitöltjük az átadott tags-ből vagy alapértékkel)
|
||||
new_addr = Address(
|
||||
latitude=lat,
|
||||
longitude=lon,
|
||||
full_address_text=tags.get("addr:full") or f"2120 Dunakeszi, {tags.get('addr:street', 'Ismeretlen')} {tags.get('addr:housenumber', '1')}",
|
||||
street_name=tags.get("addr:street") or "Ismeretlen",
|
||||
street_type=tags.get("addr:type") or "utca",
|
||||
house_number=tags.get("addr:number") or tags.get("addr:housenumber") or "1"
|
||||
)
|
||||
db.add(new_addr); await db.flush()
|
||||
|
||||
# 2. Organization létrehozása (a modelled alapján ezek a mezők itt vannak)
|
||||
org = Organization(
|
||||
full_name=final_name,
|
||||
name=final_name[:50],
|
||||
org_type=OrgType.service,
|
||||
address_id=new_addr.id,
|
||||
address_city=tags.get("addr:city") or "Dunakeszi",
|
||||
address_zip=tags.get("addr:zip") or "2120",
|
||||
address_street_name=new_addr.street_name,
|
||||
address_street_type=new_addr.street_type,
|
||||
address_house_number=new_addr.house_number
|
||||
)
|
||||
db.add(org); await db.flush()
|
||||
|
||||
# 3. Service Profile
|
||||
trust = 50 if source == "local_manual" else (30 if google_data else 10)
|
||||
spec = {"brands": [], "types": google_data["types"] if google_data else [], "osm_tags": tags}
|
||||
if tags.get("brand"): spec["brands"].append(tags.get("brand"))
|
||||
|
||||
profile = ServiceProfile(
|
||||
organization_id=org.id,
|
||||
location=WKTElement(f'POINT({lon} {lat})', srid=4326),
|
||||
status="ghost",
|
||||
trust_score=trust,
|
||||
google_place_id=google_data["google_id"] if google_data else None,
|
||||
specialization_tags=spec,
|
||||
website=google_data["website"] if google_data else tags.get("website"),
|
||||
contact_phone=google_data["phone"] if google_data else tags.get("phone")
|
||||
)
|
||||
db.add(profile)
|
||||
|
||||
# 4. Tulajdonos rögzítése
|
||||
owner_name = tags.get("operator") or tags.get("contact:person")
|
||||
if owner_name and len(owner_name) > 3:
|
||||
person = await cls.get_or_create_person(db, owner_name)
|
||||
db.add(OrganizationMember(
|
||||
organization_id=org.id,
|
||||
person_id=person.id,
|
||||
role=OrgUserRole.OWNER,
|
||||
is_verified=False
|
||||
))
|
||||
|
||||
await cls.enrich_financials(db, org.id)
|
||||
await db.flush()
|
||||
logger.info(f"✨ [{source.upper()}] Mentve: {final_name} (Bizalom: {trust})")
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot 2.7.2: Dunakeszi Detective indítása...")
|
||||
|
||||
# Kapcsolódási védelem
|
||||
connected = False
|
||||
while not connected:
|
||||
try:
|
||||
async with SessionLocal() as db:
|
||||
await db.execute(text("SELECT 1"))
|
||||
connected = True
|
||||
except Exception as e:
|
||||
logger.warning(f"⏳ Várakozás a hálózatra (shared-postgres host?): {e}")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
try:
|
||||
await db.execute(text("SET search_path TO data, public"))
|
||||
# 1. Beküldött CSV feldolgozása (Geocoding-al)
|
||||
await cls.import_local_csv(db)
|
||||
await db.commit()
|
||||
|
||||
# 2. OSM Szkennelés
|
||||
query = """[out:json][timeout:120];area["name"="Dunakeszi"]->.city;(nwr["shop"~"car_repair|motorcycle_repair|tyres|car_parts|motorcycle"](area.city);nwr["amenity"~"car_repair|vehicle_inspection|motorcycle_repair|fuel|charging_station|car_wash"](area.city);nwr["amenity"~"car_repair|fuel|charging_station"](around:5000, 47.63, 19.13););out center;"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.OVERPASS_URL, data={"data": query}, timeout=120)
|
||||
if resp.status_code == 200:
|
||||
elements = resp.json().get("elements", [])
|
||||
for el in elements:
|
||||
await cls.save_service_deep(db, el, source="osm")
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Futáshiba: {e}")
|
||||
|
||||
logger.info("😴 Scan kész, 24 óra pihenő...")
|
||||
await asyncio.sleep(86400)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ServiceHunter.run())
|
||||
@@ -1,125 +1,129 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/technical_enricher.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import datetime
|
||||
import random
|
||||
import sys
|
||||
from sqlalchemy import select, and_, update, text, func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.db.session import SessionLocal
|
||||
# JAVÍTVA: case hozzáadva az importhoz
|
||||
from sqlalchemy import select, and_, update, text, func, case
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
from app.models.asset import AssetCatalog
|
||||
from app.services.ai_service import AIService
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
# --- SZIGORÚ NAPLÓZÁS KONFIGURÁCIÓ ---
|
||||
for handler in logging.root.handlers[:]:
|
||||
logging.root.removeHandler(handler)
|
||||
|
||||
# --- SZIGORÚ NAPLÓZÁS ---
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s.%(msecs)03d [%(levelname)s] Alchemist: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
format='%(asctime)s [%(levelname)s] Alchemist-v1.3: %(message)s',
|
||||
stream=sys.stdout
|
||||
)
|
||||
logger = logging.getLogger("Robot-Enricher-v1.3.0")
|
||||
logger = logging.getLogger("Robot-Enricher")
|
||||
|
||||
class TechEnricher:
|
||||
"""
|
||||
Industrial TechEnricher v1.3.0
|
||||
- Fix: Deadlock elkerülése izolált session-kezeléssel.
|
||||
- Logika: Napi 500 AI hívás, Smart Merge, Web Fallback.
|
||||
Industrial TechEnricher (Alchemist Bot).
|
||||
Felelős az MDM (Master Data Management) 'Arany' rekordjainak előállításáért.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.max_attempts = 5
|
||||
self.batch_size = 15
|
||||
self.batch_size = 10
|
||||
self.daily_ai_limit = 500
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
|
||||
def check_budget(self) -> bool:
|
||||
""" Ellenőrzi, hogy beleférünk-e még a napi AI keretbe. """
|
||||
if datetime.date.today() > self.last_reset_date:
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
return self.ai_calls_today < self.daily_ai_limit
|
||||
|
||||
def is_data_sane(self, data: dict) -> bool:
|
||||
""" Technikai józansági vizsgálat (Hallucináció elleni védelem). """
|
||||
try:
|
||||
if not data: return False
|
||||
ccm = int(data.get("ccm", 0) or 0)
|
||||
kw = int(data.get("kw", 0) or 0)
|
||||
if ccm > 15000 or kw > 2000: return False
|
||||
# Extrém értékek szűrése (pl. nem létezik 20 literes személyautó motor)
|
||||
if ccm > 16000 or (kw > 1500 and data.get("vehicle_type") != "truck"):
|
||||
return False
|
||||
return True
|
||||
except: return False
|
||||
|
||||
async def get_web_wisdom(self, make: str, model: str) -> str:
|
||||
"""Keresés a neten izolált szálon (nem blokkolja az aszinkron loopot)."""
|
||||
query = f"{make} {model} technical specs maintenance oil qty tire size"
|
||||
""" Ha az AI bizonytalan, ez a funkció gyűjt kontextust a netről. """
|
||||
query = f"{make} {model} technical specifications oil capacity engine code"
|
||||
try:
|
||||
def sync_search():
|
||||
with DDGS() as ddgs:
|
||||
return "\n".join([r['body'] for r in ddgs.text(query, max_results=3)])
|
||||
# Az első 3 találat body részét gyűjtjük össze
|
||||
results = ddgs.text(query, max_results=3)
|
||||
return "\n".join([r['body'] for r in results]) if results else ""
|
||||
|
||||
return await asyncio.to_thread(sync_search)
|
||||
except Exception as e:
|
||||
logger.warning(f"🌐 Web hiba ({make}): {e}")
|
||||
logger.warning(f"🌐 Web Search Error ({make}): {e}")
|
||||
return ""
|
||||
|
||||
async def process_single_record(self, record_id: int):
|
||||
"""
|
||||
Egyetlen rekord dúsítása izolált folyamatban.
|
||||
Logika: Read -> AI Process -> Write Merge.
|
||||
"""
|
||||
Dúsítási folyamat 3 szigorúan elválasztott lépésben a fagyás ellen:
|
||||
1. Adat lekérése és DB bezárása.
|
||||
2. AI munka (DB nélkül).
|
||||
3. Mentés új sessionben.
|
||||
"""
|
||||
# --- 1. LÉPÉS: ADAT LEKÉRÉSE ---
|
||||
async with SessionLocal() as db:
|
||||
stmt = select(VehicleModelDefinition).where(VehicleModelDefinition.id == record_id)
|
||||
res = await db.execute(stmt)
|
||||
# 1. ADAT LEKÉRÉSE
|
||||
async with AsyncSessionLocal() as db:
|
||||
res = await db.execute(select(VehicleModelDefinition).where(VehicleModelDefinition.id == record_id))
|
||||
rec = res.scalar_one_or_none()
|
||||
if not rec: return
|
||||
make, m_name, v_type = rec.make, rec.marketing_name, (rec.vehicle_type or "car")
|
||||
logger.info(f"🧪 >>> Dúsítás indítása: {make} {m_name}")
|
||||
|
||||
# --- 2. LÉPÉS: AI MUNKA (DB session itt nincs nyitva!) ---
|
||||
# 2. AI FELDOLGOZÁS (DB kapcsolat nélkül!)
|
||||
try:
|
||||
# AIService hívása a kötelező 4. 'sources' paraméterrel
|
||||
# Elsődleges kísérlet a belső tudásbázis alapján
|
||||
ai_data = await AIService.get_clean_vehicle_data(make, m_name, v_type, {})
|
||||
|
||||
# Ha az AI bizonytalan, indítunk egy webes mélyfúrást
|
||||
if not ai_data or not ai_data.get("kw"):
|
||||
logger.info(f"🔍 AI bizonytalan, webes dúsítás indul: {make} {m_name}")
|
||||
logger.info(f"🔍 AI bizonytalan, Web-Context hívása: {make} {m_name}")
|
||||
web_info = await self.get_web_wisdom(make, m_name)
|
||||
ai_data = await AIService.get_clean_vehicle_data(make, m_name, v_type, {"web_context": web_info})
|
||||
|
||||
if not ai_data: raise ValueError("Az AI nem adott értékelhető választ.")
|
||||
if not ai_data or not self.is_data_sane(ai_data):
|
||||
raise ValueError("Hibás vagy hiányos AI válasz.")
|
||||
|
||||
# --- 3. LÉPÉS: MENTÉS (Új session nyitása) ---
|
||||
async with SessionLocal() as db:
|
||||
# MDM (AssetCatalog) Smart Merge
|
||||
# 3. MENTÉS ÉS MERGE (Új session)
|
||||
async with AsyncSessionLocal() as db:
|
||||
# MDM Összefésülés: létezik-e már ez a variáns a katalógusban?
|
||||
clean_model = str(ai_data.get("marketing_name", m_name))[:50].upper()
|
||||
cat_stmt = select(AssetCatalog).where(and_(
|
||||
AssetCatalog.make == make.upper(),
|
||||
AssetCatalog.model == ai_data.get("marketing_name", m_name)[:50],
|
||||
AssetCatalog.model == clean_model,
|
||||
AssetCatalog.power_kw == ai_data.get("kw")
|
||||
)).limit(1)
|
||||
|
||||
if not (await db.execute(cat_stmt)).scalar_one_or_none():
|
||||
existing_cat = (await db.execute(cat_stmt)).scalar_one_or_none()
|
||||
|
||||
if not existing_cat:
|
||||
db.add(AssetCatalog(
|
||||
make=make.upper(),
|
||||
model=ai_data.get("marketing_name", m_name)[:50],
|
||||
model=clean_model,
|
||||
power_kw=ai_data.get("kw"),
|
||||
engine_capacity=ai_data.get("ccm"),
|
||||
factory_data=ai_data
|
||||
fuel_type=ai_data.get("fuel_type", "petrol"),
|
||||
factory_data=ai_data # Teljes technikai JSONB (olaj, gumi, stb.)
|
||||
))
|
||||
logger.info(f"✅ Mentve az MDM-be: {make} {m_name}")
|
||||
logger.info(f"✨ ÚJ KATALÓGUS ELEM (Gold Data): {make} {clean_model}")
|
||||
|
||||
# Staging frissítése
|
||||
# Staging (Discovery) állapot frissítése
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
status="ai_enriched",
|
||||
technical_code=ai_data.get("technical_code") or f"GEN-{record_id}",
|
||||
technical_code=ai_data.get("technical_code") or f"REF-{record_id}",
|
||||
engine_capacity=ai_data.get("ccm"),
|
||||
power_kw=ai_data.get("kw"),
|
||||
updated_at=func.now()
|
||||
@@ -130,37 +134,50 @@ class TechEnricher:
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Hiba a(z) {record_id} rekordnál: {e}")
|
||||
async with SessionLocal() as db:
|
||||
await db.execute(update(VehicleModelDefinition).where(VehicleModelDefinition.id == record_id).values(
|
||||
attempts=VehicleModelDefinition.attempts + 1,
|
||||
last_error=str(e)[:200],
|
||||
status=text("CASE WHEN attempts >= 4 THEN 'suspended' ELSE 'unverified' END"),
|
||||
updated_at=func.now()
|
||||
))
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Hibakezelés: ha sokszor bukik el, felfüggesztjük a rekordot
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
attempts=VehicleModelDefinition.attempts + 1,
|
||||
last_error=str(e)[:200],
|
||||
status=case(
|
||||
(VehicleModelDefinition.attempts >= 4, "suspended"),
|
||||
else_="unverified"
|
||||
),
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
async def run(self):
|
||||
logger.info(f"🚀 Robot 2 v1.3.0 ONLINE (Limit: {self.daily_ai_limit})")
|
||||
logger.info(f"🚀 Alchemist Robot v1.3.0 ONLINE (Napi keret: {self.daily_ai_limit})")
|
||||
while True:
|
||||
if not self.check_budget():
|
||||
logger.warning("💰 AI költségkeret kimerült mára. Alvás 1 órát.")
|
||||
await asyncio.sleep(3600); continue
|
||||
|
||||
try:
|
||||
async with SessionLocal() as db:
|
||||
# Csak az ID-kat kérjük le, hogy ne tartsuk nyitva a session-t a dúsítás alatt
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Olyan rekordokat keresünk, amik még nincsenek dúsítva és nincs túl sok hiba rajtuk
|
||||
stmt = select(VehicleModelDefinition.id).where(and_(
|
||||
VehicleModelDefinition.status == "unverified",
|
||||
VehicleModelDefinition.attempts < self.max_attempts
|
||||
)).limit(self.batch_size)
|
||||
ids = [r[0] for r in (await db.execute(stmt)).fetchall()]
|
||||
|
||||
# JAVÍTVA: Fetchall és list comprehension
|
||||
res = await db.execute(stmt)
|
||||
ids = [r[0] for r in res.fetchall()]
|
||||
|
||||
if not ids:
|
||||
await asyncio.sleep(60); continue
|
||||
|
||||
logger.info(f"📦 Batch indul: {len(ids)} rekord.")
|
||||
logger.info(f"📦 Batch feldolgozása indul: {len(ids)} tétel.")
|
||||
for rid in ids:
|
||||
await self.process_single_record(rid)
|
||||
await asyncio.sleep(random.uniform(10.0, 30.0)) # VGA kímélése
|
||||
# VGA kímélése és API rate-limit védelem
|
||||
await asyncio.sleep(random.uniform(5.0, 15.0))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Főciklus hiba: {e}")
|
||||
|
||||
Reference in New Issue
Block a user