161 lines
6.6 KiB
Python
161 lines
6.6 KiB
Python
import asyncio
|
|
import httpx
|
|
import logging
|
|
import os
|
|
import hashlib
|
|
from datetime import datetime, timezone
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select, text, update
|
|
from app.db.session import SessionLocal
|
|
from app.models.service import ServiceStaging, DiscoveryParameter
|
|
|
|
# Naplózás
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger("Robot-v1.3.1-ContinentalScout")
|
|
|
|
class ServiceHunter:
|
|
"""
|
|
Robot v1.3.1: Continental Scout (Grid Search Edition)
|
|
- Dinamikus rácsbejárás a sűrű területek lefedésére.
|
|
- Ujjlenyomat-alapú deduplikáció.
|
|
- Bővített kulcsszókezelés.
|
|
"""
|
|
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
|
|
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
|
|
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
|
|
|
@classmethod
|
|
def generate_fingerprint(cls, name: str, city: str, street: str) -> str:
|
|
"""Egyedi ujjlenyomat készítése a duplikációk kiszűrésére."""
|
|
raw_string = f"{str(name).lower()}|{str(city).lower()}|{str(street).lower()[:5]}"
|
|
return hashlib.md5(raw_string.encode()).hexdigest()
|
|
|
|
@classmethod
|
|
async def get_city_bounds(cls, city, country_code):
|
|
"""Város befoglaló téglalapjának (Bounding Box) lekérése Nominatim-al."""
|
|
url = "https://nominatim.openstreetmap.org/search"
|
|
params = {"city": city, "country": country_code, "format": "json"}
|
|
async with httpx.AsyncClient(headers={"User-Agent": "ServiceFinder-Scout/1.0"}) as client:
|
|
resp = await client.get(url, params=params)
|
|
if resp.status_code == 200 and resp.json():
|
|
bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon]
|
|
return [float(x) for x in bbox]
|
|
return None
|
|
|
|
@classmethod
|
|
async def run_grid_search(cls, db, task):
|
|
"""Rács-alapú bejárás a városon belül."""
|
|
bbox = await cls.get_city_bounds(task.city, task.country_code)
|
|
if not bbox: return
|
|
|
|
# 1km-es lépések generálása (kb. 0.01 fok)
|
|
lat_step = 0.015
|
|
lon_step = 0.02
|
|
|
|
curr_lat = bbox[0]
|
|
while curr_lat < bbox[1]:
|
|
curr_lon = bbox[2]
|
|
while curr_lon < bbox[3]:
|
|
logger.info(f"🛰️ Rács-cella pásztázása: {curr_lat}, {curr_lon} - Kulcsszó: {task.keyword}")
|
|
places = await cls.get_google_places(curr_lat, curr_lon, task.keyword)
|
|
|
|
for p in places:
|
|
# Adatok kinyerése és tisztítása
|
|
name = p.get('displayName', {}).get('text')
|
|
full_addr = p.get('formattedAddress', '')
|
|
|
|
# Ujjlenyomat generálás
|
|
f_print = cls.generate_fingerprint(name, task.city, full_addr)
|
|
|
|
await cls.save_to_staging(db, {
|
|
"external_id": p.get('id'),
|
|
"name": name,
|
|
"full_address": full_addr,
|
|
"phone": p.get('internationalPhoneNumber'),
|
|
"website": p.get('websiteUri'),
|
|
"fingerprint": f_print,
|
|
"city": task.city,
|
|
"source": "google",
|
|
"raw": p,
|
|
"trust": 30
|
|
})
|
|
curr_lon += lon_step
|
|
await asyncio.sleep(0.5) # API védelem
|
|
curr_lat += lat_step
|
|
|
|
@classmethod
|
|
async def get_google_places(cls, lat, lon, keyword):
|
|
"""Google Places New API hívás rács-pontra."""
|
|
if not cls.GOOGLE_API_KEY: return []
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
|
|
"X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
|
|
}
|
|
payload = {
|
|
"includedTypes": ["car_repair", "motorcycle_repair"],
|
|
"maxResultCount": 20,
|
|
"locationRestriction": {
|
|
"circle": {
|
|
"center": {"latitude": lat, "longitude": lon},
|
|
"radius": 1500.0 # 1.5km sugarú kör a fedés érdekében
|
|
}
|
|
}
|
|
}
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
|
|
return resp.json().get("places", []) if resp.status_code == 200 else []
|
|
|
|
@classmethod
|
|
async def save_to_staging(cls, db: AsyncSession, data: dict):
|
|
"""Mentés ujjlenyomat ellenőrzéssel."""
|
|
# 1. Megnézzük, létezik-e már ez az ujjlenyomat
|
|
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == data['fingerprint'])
|
|
existing = (await db.execute(stmt)).scalar_one_or_none()
|
|
|
|
if existing:
|
|
# Csak a bizalmi pontot növeljük és az utolsó észlelést frissítjük
|
|
existing.trust_score += 5
|
|
return
|
|
|
|
new_entry = ServiceStaging(
|
|
name=data['name'],
|
|
source=data['source'],
|
|
external_id=str(data['external_id']),
|
|
fingerprint=data['fingerprint'],
|
|
city=data['city'],
|
|
full_address=data['full_address'],
|
|
contact_phone=data['phone'],
|
|
website=data['website'],
|
|
raw_data=data.get('raw', {}),
|
|
status="pending",
|
|
trust_score=data.get('trust', 30)
|
|
)
|
|
db.add(new_entry)
|
|
await db.flush()
|
|
|
|
@classmethod
|
|
async def run(cls):
|
|
logger.info("🤖 Continental Scout v1.3.1 - Grid Engine INDUL...")
|
|
while True:
|
|
async with SessionLocal() as db:
|
|
try:
|
|
await db.execute(text("SET search_path TO data, public"))
|
|
stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True)
|
|
tasks = (await db.execute(stmt)).scalars().all()
|
|
|
|
for task in tasks:
|
|
logger.info(f"🔎 Mélyfúrás indítása: {task.city} -> {task.keyword}")
|
|
await cls.run_grid_search(db, task)
|
|
|
|
task.last_run_at = datetime.now(timezone.utc)
|
|
await db.commit()
|
|
|
|
except Exception as e:
|
|
logger.error(f"💥 Hiba: {e}")
|
|
await db.rollback()
|
|
|
|
await asyncio.sleep(3600)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(ServiceHunter.run()) |