#!/usr/bin/env python3 from __future__ import annotations import os from typing import Dict, Set from ..http_client import configure_cookies from ..get_auth import login from config import config from utils import log # DB: persistence helpers from .tools import ( _resolve_sub_redirects, valid_subs, candidate_subs, rewrite_nav, capture_product_slugs, fetch_and_upsert_products, ) from ..nav import nav_scrape # ------------------------ core ------------------------ async def build_snapshot( concurrency: int, user: str, password: str, save_nav, capture_listing, upsert_product, log_product_result, save_subcategory_redirects, save_link_reports = None, ) -> None: # NOTE: we keep ensure_dir for listings iteration but no longer write JSON files. # Make project importable import sys sys.path.insert(0, os.path.abspath(".")) cookies = await login(username=user, password=password) await configure_cookies(cookies) for k, v in dict(cookies).items(): print("logged in with", k, v) # 1) NAV log("Fetching nav…") nav = await nav_scrape() # Build valid subs per top from nav valid_subs_by_top: Dict[str, Set[str]] = valid_subs(nav) # Resolve redirects for all subs in nav first nav_sub_candidates = candidate_subs(nav) nav_redirects = await _resolve_sub_redirects( base_url=config()["base_url"], candidates=nav_sub_candidates, allowed_tops=set(config()["categories"]["allow"].values()), valid_subs_by_top=valid_subs_by_top, ) rewrite_nav(nav, nav_redirects) # DB: save nav await save_nav(nav) product_slugs: Set[str] = await capture_product_slugs( nav, capture_listing ) unknown_sub_paths: Set[str] = set() # 3) PRODUCTS (fetch details) await fetch_and_upsert_products( upsert_product, log_product_result, save_link_reports, concurrency, product_slugs, valid_subs_by_top, unknown_sub_paths ) # Subcategory redirects from HTML log("Resolving subcategory redirects…") html_redirects = await _resolve_sub_redirects( base_url=config()["base_url"], candidates=unknown_sub_paths, allowed_tops=set(config()["categories"]["allow"].values()), valid_subs_by_top=valid_subs_by_top, ) sub_redirects: Dict[str, str] = dict(nav_redirects) sub_redirects.update(html_redirects) # DB: persist redirects await save_subcategory_redirects(sub_redirects) log("Snapshot build complete (to Postgres).")