Cleanup: MB 2.0 Gap Analysis előtti állapot (adatok kizárva)

This commit is contained in:
2026-02-23 09:44:02 +01:00
parent 5757754aae
commit 893f39fa15
74 changed files with 34239 additions and 2834 deletions

View File

@@ -2,159 +2,159 @@ import asyncio
import httpx
import logging
import os
import hashlib
from datetime import datetime, timezone
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, text
from sqlalchemy import select, text, update
from app.db.session import SessionLocal
# Modellek - Az új v1.3 struktúra
from app.models.service import ServiceStaging, DiscoveryParameter
# Naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("Robot-v1.3-ContinentalScout")
logger = logging.getLogger("Robot-v1.3.1-ContinentalScout")
class ServiceHunter:
"""
Robot v1.3.0: Continental Scout.
EU-szintű felderítő motor, Discovery tábla alapú vezérléssel.
Robot v1.3.1: Continental Scout (Grid Search Edition)
- Dinamikus rácsbejárás a sűrű területek lefedésére.
- Ujjlenyomat-alapú deduplikáció.
- Bővített kulcsszókezelés.
"""
OVERPASS_URL = "http://overpass-api.de/api/interpreter"
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
@classmethod
async def get_coordinates(cls, city, country_code):
"""Város központjának lekérése a keresés indításához."""
params = {"address": f"{city}, {country_code}", "key": cls.GOOGLE_API_KEY}
async with httpx.AsyncClient() as client:
resp = await client.get(cls.GEOCODE_URL, params=params)
if resp.status_code == 200:
results = resp.json().get("results")
if results:
loc = results[0]["geometry"]["location"]
return loc["lat"], loc["lng"]
return None, None
def generate_fingerprint(cls, name: str, city: str, street: str) -> str:
"""Egyedi ujjlenyomat készítése a duplikációk kiszűrésére."""
raw_string = f"{str(name).lower()}|{str(city).lower()}|{str(street).lower()[:5]}"
return hashlib.md5(raw_string.encode()).hexdigest()
@classmethod
async def get_city_bounds(cls, city, country_code):
"""Város befoglaló téglalapjának (Bounding Box) lekérése Nominatim-al."""
url = "https://nominatim.openstreetmap.org/search"
params = {"city": city, "country": country_code, "format": "json"}
async with httpx.AsyncClient(headers={"User-Agent": "ServiceFinder-Scout/1.0"}) as client:
resp = await client.get(url, params=params)
if resp.status_code == 200 and resp.json():
bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon]
return [float(x) for x in bbox]
return None
@classmethod
async def run_grid_search(cls, db, task):
"""Rács-alapú bejárás a városon belül."""
bbox = await cls.get_city_bounds(task.city, task.country_code)
if not bbox: return
# 1km-es lépések generálása (kb. 0.01 fok)
lat_step = 0.015
lon_step = 0.02
curr_lat = bbox[0]
while curr_lat < bbox[1]:
curr_lon = bbox[2]
while curr_lon < bbox[3]:
logger.info(f"🛰️ Rács-cella pásztázása: {curr_lat}, {curr_lon} - Kulcsszó: {task.keyword}")
places = await cls.get_google_places(curr_lat, curr_lon, task.keyword)
for p in places:
# Adatok kinyerése és tisztítása
name = p.get('displayName', {}).get('text')
full_addr = p.get('formattedAddress', '')
# Ujjlenyomat generálás
f_print = cls.generate_fingerprint(name, task.city, full_addr)
await cls.save_to_staging(db, {
"external_id": p.get('id'),
"name": name,
"full_address": full_addr,
"phone": p.get('internationalPhoneNumber'),
"website": p.get('websiteUri'),
"fingerprint": f_print,
"city": task.city,
"source": "google",
"raw": p,
"trust": 30
})
curr_lon += lon_step
await asyncio.sleep(0.5) # API védelem
curr_lat += lat_step
@classmethod
async def get_google_places(cls, lat, lon, keyword):
"""Google Places New API - Javított, 400-as hiba elleni védelemmel."""
"""Google Places New API hívás rács-pontra."""
if not cls.GOOGLE_API_KEY: return []
headers = {
"Content-Type": "application/json",
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
"X-Goog-FieldMask": "places.displayName,places.id,places.types,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
"X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
}
# A 'keyword' a TextQuery-hez kellene, a SearchNearby-nél típusokat (includedTypes) használunk.
# EU szintű trükk: Ha nincs pontos típus, a 'car_repair' az alapértelmezett.
payload = {
"includedTypes": ["car_repair", "gas_station", "car_wash", "motorcycle_repair"],
"includedTypes": ["car_repair", "motorcycle_repair"],
"maxResultCount": 20,
"locationRestriction": {
"circle": {
"center": {"latitude": lat, "longitude": lon},
"radius": 5000.0 # 5km körzet
"radius": 1500.0 # 1.5km sugarú kör a fedés érdekében
}
}
}
async with httpx.AsyncClient() as client:
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
if resp.status_code == 200:
return resp.json().get("places", [])
else:
logger.error(f"❌ Google API hiba ({resp.status_code}): {resp.text}")
return []
return resp.json().get("places", []) if resp.status_code == 200 else []
@classmethod
async def save_to_staging(cls, db: AsyncSession, data: dict):
"""Mentés a Staging táblába 9-mezős bontással."""
stmt = select(ServiceStaging).where(ServiceStaging.external_id == str(data['external_id']))
if (await db.execute(stmt)).scalar_one_or_none(): return
"""Mentés ujjlenyomat ellenőrzéssel."""
# 1. Megnézzük, létezik-e már ez az ujjlenyomat
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == data['fingerprint'])
existing = (await db.execute(stmt)).scalar_one_or_none()
if existing:
# Csak a bizalmi pontot növeljük és az utolsó észlelést frissítjük
existing.trust_score += 5
return
new_entry = ServiceStaging(
name=data['name'],
source=data['source'],
external_id=str(data['external_id']),
# Itt történik a 9-mezős bontás (ha érkezik adat)
postal_code=data.get('zip'),
city=data.get('city'),
street_name=data.get('street'),
street_type=data.get('street_type', 'utca'),
house_number=data.get('number'),
full_address=data.get('full_address'),
contact_phone=data.get('phone'),
website=data.get('website'),
fingerprint=data['fingerprint'],
city=data['city'],
full_address=data['full_address'],
contact_phone=data['phone'],
website=data['website'],
raw_data=data.get('raw', {}),
status="pending",
trust_score=data.get('trust', 10)
trust_score=data.get('trust', 30)
)
db.add(new_entry)
await db.flush()
@classmethod
async def run(cls):
logger.info("🤖 Robot v1.3.0: Continental Scout elindult...")
logger.info("🤖 Continental Scout v1.3.1 - Grid Engine INDUL...")
while True:
async with SessionLocal() as db:
try:
await db.execute(text("SET search_path TO data, public"))
# 1. Paraméterek lekérése a táblából
stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True)
tasks = (await db.execute(stmt)).scalars().all()
for task in tasks:
logger.info(f"🔎 Felderítés: {task.city} ({task.country_code}) -> {task.keyword}")
logger.info(f"🔎 Mélyfúrás indítása: {task.city} -> {task.keyword}")
await cls.run_grid_search(db, task)
# Koordináták beszerzése a kereséshez
lat, lon = await cls.get_coordinates(task.city, task.country_code)
if not lat: continue
# --- GOOGLE FÁZIS ---
google_places = await cls.get_google_places(lat, lon, task.keyword)
for p in google_places:
await cls.save_to_staging(db, {
"external_id": p.get('id'),
"name": p.get('displayName', {}).get('text'),
"full_address": p.get('formattedAddress'),
"phone": p.get('internationalPhoneNumber'),
"website": p.get('websiteUri'),
"source": "google",
"raw": p,
"trust": 30
})
# --- OSM FÁZIS (EU kompatibilis lekérdezés) ---
osm_query = f"""[out:json][timeout:60];
(nwr["amenity"~"car_repair|fuel"](around:5000, {lat}, {lon}););
out center;"""
async with httpx.AsyncClient() as client:
resp = await client.post(cls.OVERPASS_URL, data={"data": osm_query})
if resp.status_code == 200:
for el in resp.json().get("elements", []):
t = el.get("tags", {})
await cls.save_to_staging(db, {
"external_id": f"osm_{el['id']}",
"name": t.get('name', 'Ismeretlen szerviz'),
"city": t.get('addr:city', task.city),
"zip": t.get('addr:postcode'),
"street": t.get('addr:street'),
"number": t.get('addr:housenumber'),
"source": "osm",
"raw": el,
"trust": 15
})
task.last_run_at = datetime.now(timezone.utc)
await db.commit()
logger.info(f"{task.city} felderítve.")
except Exception as e:
logger.error(f"💥 Kritikus hiba a ciklusban: {e}")
logger.error(f"💥 Hiba: {e}")
await db.rollback()
logger.info("😴 Minden aktív feladat kész. Alvás 1 órán át...")
await asyncio.sleep(3600)
if __name__ == "__main__":