Cleanup: MB 2.0 Gap Analysis előtti állapot (adatok kizárva)
This commit is contained in:
@@ -2,159 +2,159 @@ import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy import select, text, update
|
||||
from app.db.session import SessionLocal
|
||||
|
||||
# Modellek - Az új v1.3 struktúra
|
||||
from app.models.service import ServiceStaging, DiscoveryParameter
|
||||
|
||||
# Naplózás
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger("Robot-v1.3-ContinentalScout")
|
||||
logger = logging.getLogger("Robot-v1.3.1-ContinentalScout")
|
||||
|
||||
class ServiceHunter:
|
||||
"""
|
||||
Robot v1.3.0: Continental Scout.
|
||||
EU-szintű felderítő motor, Discovery tábla alapú vezérléssel.
|
||||
Robot v1.3.1: Continental Scout (Grid Search Edition)
|
||||
- Dinamikus rácsbejárás a sűrű területek lefedésére.
|
||||
- Ujjlenyomat-alapú deduplikáció.
|
||||
- Bővített kulcsszókezelés.
|
||||
"""
|
||||
OVERPASS_URL = "http://overpass-api.de/api/interpreter"
|
||||
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
|
||||
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
|
||||
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
||||
|
||||
@classmethod
|
||||
async def get_coordinates(cls, city, country_code):
|
||||
"""Város központjának lekérése a keresés indításához."""
|
||||
params = {"address": f"{city}, {country_code}", "key": cls.GOOGLE_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(cls.GEOCODE_URL, params=params)
|
||||
if resp.status_code == 200:
|
||||
results = resp.json().get("results")
|
||||
if results:
|
||||
loc = results[0]["geometry"]["location"]
|
||||
return loc["lat"], loc["lng"]
|
||||
return None, None
|
||||
def generate_fingerprint(cls, name: str, city: str, street: str) -> str:
|
||||
"""Egyedi ujjlenyomat készítése a duplikációk kiszűrésére."""
|
||||
raw_string = f"{str(name).lower()}|{str(city).lower()}|{str(street).lower()[:5]}"
|
||||
return hashlib.md5(raw_string.encode()).hexdigest()
|
||||
|
||||
@classmethod
|
||||
async def get_city_bounds(cls, city, country_code):
|
||||
"""Város befoglaló téglalapjának (Bounding Box) lekérése Nominatim-al."""
|
||||
url = "https://nominatim.openstreetmap.org/search"
|
||||
params = {"city": city, "country": country_code, "format": "json"}
|
||||
async with httpx.AsyncClient(headers={"User-Agent": "ServiceFinder-Scout/1.0"}) as client:
|
||||
resp = await client.get(url, params=params)
|
||||
if resp.status_code == 200 and resp.json():
|
||||
bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon]
|
||||
return [float(x) for x in bbox]
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def run_grid_search(cls, db, task):
|
||||
"""Rács-alapú bejárás a városon belül."""
|
||||
bbox = await cls.get_city_bounds(task.city, task.country_code)
|
||||
if not bbox: return
|
||||
|
||||
# 1km-es lépések generálása (kb. 0.01 fok)
|
||||
lat_step = 0.015
|
||||
lon_step = 0.02
|
||||
|
||||
curr_lat = bbox[0]
|
||||
while curr_lat < bbox[1]:
|
||||
curr_lon = bbox[2]
|
||||
while curr_lon < bbox[3]:
|
||||
logger.info(f"🛰️ Rács-cella pásztázása: {curr_lat}, {curr_lon} - Kulcsszó: {task.keyword}")
|
||||
places = await cls.get_google_places(curr_lat, curr_lon, task.keyword)
|
||||
|
||||
for p in places:
|
||||
# Adatok kinyerése és tisztítása
|
||||
name = p.get('displayName', {}).get('text')
|
||||
full_addr = p.get('formattedAddress', '')
|
||||
|
||||
# Ujjlenyomat generálás
|
||||
f_print = cls.generate_fingerprint(name, task.city, full_addr)
|
||||
|
||||
await cls.save_to_staging(db, {
|
||||
"external_id": p.get('id'),
|
||||
"name": name,
|
||||
"full_address": full_addr,
|
||||
"phone": p.get('internationalPhoneNumber'),
|
||||
"website": p.get('websiteUri'),
|
||||
"fingerprint": f_print,
|
||||
"city": task.city,
|
||||
"source": "google",
|
||||
"raw": p,
|
||||
"trust": 30
|
||||
})
|
||||
curr_lon += lon_step
|
||||
await asyncio.sleep(0.5) # API védelem
|
||||
curr_lat += lat_step
|
||||
|
||||
@classmethod
|
||||
async def get_google_places(cls, lat, lon, keyword):
|
||||
"""Google Places New API - Javított, 400-as hiba elleni védelemmel."""
|
||||
"""Google Places New API hívás rács-pontra."""
|
||||
if not cls.GOOGLE_API_KEY: return []
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.types,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
|
||||
}
|
||||
|
||||
# A 'keyword' a TextQuery-hez kellene, a SearchNearby-nél típusokat (includedTypes) használunk.
|
||||
# EU szintű trükk: Ha nincs pontos típus, a 'car_repair' az alapértelmezett.
|
||||
payload = {
|
||||
"includedTypes": ["car_repair", "gas_station", "car_wash", "motorcycle_repair"],
|
||||
"includedTypes": ["car_repair", "motorcycle_repair"],
|
||||
"maxResultCount": 20,
|
||||
"locationRestriction": {
|
||||
"circle": {
|
||||
"center": {"latitude": lat, "longitude": lon},
|
||||
"radius": 5000.0 # 5km körzet
|
||||
"radius": 1500.0 # 1.5km sugarú kör a fedés érdekében
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
|
||||
if resp.status_code == 200:
|
||||
return resp.json().get("places", [])
|
||||
else:
|
||||
logger.error(f"❌ Google API hiba ({resp.status_code}): {resp.text}")
|
||||
return []
|
||||
return resp.json().get("places", []) if resp.status_code == 200 else []
|
||||
|
||||
@classmethod
|
||||
async def save_to_staging(cls, db: AsyncSession, data: dict):
|
||||
"""Mentés a Staging táblába 9-mezős bontással."""
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.external_id == str(data['external_id']))
|
||||
if (await db.execute(stmt)).scalar_one_or_none(): return
|
||||
"""Mentés ujjlenyomat ellenőrzéssel."""
|
||||
# 1. Megnézzük, létezik-e már ez az ujjlenyomat
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == data['fingerprint'])
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Csak a bizalmi pontot növeljük és az utolsó észlelést frissítjük
|
||||
existing.trust_score += 5
|
||||
return
|
||||
|
||||
new_entry = ServiceStaging(
|
||||
name=data['name'],
|
||||
source=data['source'],
|
||||
external_id=str(data['external_id']),
|
||||
# Itt történik a 9-mezős bontás (ha érkezik adat)
|
||||
postal_code=data.get('zip'),
|
||||
city=data.get('city'),
|
||||
street_name=data.get('street'),
|
||||
street_type=data.get('street_type', 'utca'),
|
||||
house_number=data.get('number'),
|
||||
full_address=data.get('full_address'),
|
||||
contact_phone=data.get('phone'),
|
||||
website=data.get('website'),
|
||||
fingerprint=data['fingerprint'],
|
||||
city=data['city'],
|
||||
full_address=data['full_address'],
|
||||
contact_phone=data['phone'],
|
||||
website=data['website'],
|
||||
raw_data=data.get('raw', {}),
|
||||
status="pending",
|
||||
trust_score=data.get('trust', 10)
|
||||
trust_score=data.get('trust', 30)
|
||||
)
|
||||
db.add(new_entry)
|
||||
await db.flush()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot v1.3.0: Continental Scout elindult...")
|
||||
|
||||
logger.info("🤖 Continental Scout v1.3.1 - Grid Engine INDUL...")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
try:
|
||||
await db.execute(text("SET search_path TO data, public"))
|
||||
# 1. Paraméterek lekérése a táblából
|
||||
stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True)
|
||||
tasks = (await db.execute(stmt)).scalars().all()
|
||||
|
||||
for task in tasks:
|
||||
logger.info(f"🔎 Felderítés: {task.city} ({task.country_code}) -> {task.keyword}")
|
||||
logger.info(f"🔎 Mélyfúrás indítása: {task.city} -> {task.keyword}")
|
||||
await cls.run_grid_search(db, task)
|
||||
|
||||
# Koordináták beszerzése a kereséshez
|
||||
lat, lon = await cls.get_coordinates(task.city, task.country_code)
|
||||
if not lat: continue
|
||||
|
||||
# --- GOOGLE FÁZIS ---
|
||||
google_places = await cls.get_google_places(lat, lon, task.keyword)
|
||||
for p in google_places:
|
||||
await cls.save_to_staging(db, {
|
||||
"external_id": p.get('id'),
|
||||
"name": p.get('displayName', {}).get('text'),
|
||||
"full_address": p.get('formattedAddress'),
|
||||
"phone": p.get('internationalPhoneNumber'),
|
||||
"website": p.get('websiteUri'),
|
||||
"source": "google",
|
||||
"raw": p,
|
||||
"trust": 30
|
||||
})
|
||||
|
||||
# --- OSM FÁZIS (EU kompatibilis lekérdezés) ---
|
||||
osm_query = f"""[out:json][timeout:60];
|
||||
(nwr["amenity"~"car_repair|fuel"](around:5000, {lat}, {lon}););
|
||||
out center;"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.OVERPASS_URL, data={"data": osm_query})
|
||||
if resp.status_code == 200:
|
||||
for el in resp.json().get("elements", []):
|
||||
t = el.get("tags", {})
|
||||
await cls.save_to_staging(db, {
|
||||
"external_id": f"osm_{el['id']}",
|
||||
"name": t.get('name', 'Ismeretlen szerviz'),
|
||||
"city": t.get('addr:city', task.city),
|
||||
"zip": t.get('addr:postcode'),
|
||||
"street": t.get('addr:street'),
|
||||
"number": t.get('addr:housenumber'),
|
||||
"source": "osm",
|
||||
"raw": el,
|
||||
"trust": 15
|
||||
})
|
||||
|
||||
task.last_run_at = datetime.now(timezone.utc)
|
||||
await db.commit()
|
||||
logger.info(f"✅ {task.city} felderítve.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💥 Kritikus hiba a ciklusban: {e}")
|
||||
logger.error(f"💥 Hiba: {e}")
|
||||
await db.rollback()
|
||||
|
||||
logger.info("😴 Minden aktív feladat kész. Alvás 1 órán át...")
|
||||
await asyncio.sleep(3600)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user