import asyncio import httpx import logging import os import hashlib from datetime import datetime, timezone from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select, text, update from app.db.session import SessionLocal from app.models.service import ServiceStaging, DiscoveryParameter # Naplózás logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger("Robot-v1.3.1-ContinentalScout") class ServiceHunter: """ Robot v1.3.1: Continental Scout (Grid Search Edition) - Dinamikus rácsbejárás a sűrű területek lefedésére. - Ujjlenyomat-alapú deduplikáció. - Bővített kulcsszókezelés. """ PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby" GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json" GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") @classmethod def generate_fingerprint(cls, name: str, city: str, street: str) -> str: """Egyedi ujjlenyomat készítése a duplikációk kiszűrésére.""" raw_string = f"{str(name).lower()}|{str(city).lower()}|{str(street).lower()[:5]}" return hashlib.md5(raw_string.encode()).hexdigest() @classmethod async def get_city_bounds(cls, city, country_code): """Város befoglaló téglalapjának (Bounding Box) lekérése Nominatim-al.""" url = "https://nominatim.openstreetmap.org/search" params = {"city": city, "country": country_code, "format": "json"} async with httpx.AsyncClient(headers={"User-Agent": "ServiceFinder-Scout/1.0"}) as client: resp = await client.get(url, params=params) if resp.status_code == 200 and resp.json(): bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon] return [float(x) for x in bbox] return None @classmethod async def run_grid_search(cls, db, task): """Rács-alapú bejárás a városon belül.""" bbox = await cls.get_city_bounds(task.city, task.country_code) if not bbox: return # 1km-es lépések generálása (kb. 0.01 fok) lat_step = 0.015 lon_step = 0.02 curr_lat = bbox[0] while curr_lat < bbox[1]: curr_lon = bbox[2] while curr_lon < bbox[3]: logger.info(f"🛰️ Rács-cella pásztázása: {curr_lat}, {curr_lon} - Kulcsszó: {task.keyword}") places = await cls.get_google_places(curr_lat, curr_lon, task.keyword) for p in places: # Adatok kinyerése és tisztítása name = p.get('displayName', {}).get('text') full_addr = p.get('formattedAddress', '') # Ujjlenyomat generálás f_print = cls.generate_fingerprint(name, task.city, full_addr) await cls.save_to_staging(db, { "external_id": p.get('id'), "name": name, "full_address": full_addr, "phone": p.get('internationalPhoneNumber'), "website": p.get('websiteUri'), "fingerprint": f_print, "city": task.city, "source": "google", "raw": p, "trust": 30 }) curr_lon += lon_step await asyncio.sleep(0.5) # API védelem curr_lat += lat_step @classmethod async def get_google_places(cls, lat, lon, keyword): """Google Places New API hívás rács-pontra.""" if not cls.GOOGLE_API_KEY: return [] headers = { "Content-Type": "application/json", "X-Goog-Api-Key": cls.GOOGLE_API_KEY, "X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress" } payload = { "includedTypes": ["car_repair", "motorcycle_repair"], "maxResultCount": 20, "locationRestriction": { "circle": { "center": {"latitude": lat, "longitude": lon}, "radius": 1500.0 # 1.5km sugarú kör a fedés érdekében } } } async with httpx.AsyncClient() as client: resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers) return resp.json().get("places", []) if resp.status_code == 200 else [] @classmethod async def save_to_staging(cls, db: AsyncSession, data: dict): """Mentés ujjlenyomat ellenőrzéssel.""" # 1. Megnézzük, létezik-e már ez az ujjlenyomat stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == data['fingerprint']) existing = (await db.execute(stmt)).scalar_one_or_none() if existing: # Csak a bizalmi pontot növeljük és az utolsó észlelést frissítjük existing.trust_score += 5 return new_entry = ServiceStaging( name=data['name'], source=data['source'], external_id=str(data['external_id']), fingerprint=data['fingerprint'], city=data['city'], full_address=data['full_address'], contact_phone=data['phone'], website=data['website'], raw_data=data.get('raw', {}), status="pending", trust_score=data.get('trust', 30) ) db.add(new_entry) await db.flush() @classmethod async def run(cls): logger.info("🤖 Continental Scout v1.3.1 - Grid Engine INDUL...") while True: async with SessionLocal() as db: try: await db.execute(text("SET search_path TO data, public")) stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True) tasks = (await db.execute(stmt)).scalars().all() for task in tasks: logger.info(f"🔎 Mélyfúrás indítása: {task.city} -> {task.keyword}") await cls.run_grid_search(db, task) task.last_run_at = datetime.now(timezone.utc) await db.commit() except Exception as e: logger.error(f"💥 Hiba: {e}") await db.rollback() await asyncio.sleep(3600) if __name__ == "__main__": asyncio.run(ServiceHunter.run())