#!/usr/bin/env python3 from __future__ import annotations import argparse import asyncio import os from config import init_config from scrape.auth import get_access_token from scrape.build_snapshot import build_snapshot from scrape.persist_api import ( save_nav, upsert_product, log_product_result, capture_listing, save_subcategory_redirects, ) # ------------------------ CLI ------------------------ def main() -> None: ap = argparse.ArgumentParser() ap.add_argument("--out", default="./snapshot", help="(unused for JSON now; kept for compatibility)") ap.add_argument("--max-pages", type=int, default=999) ap.add_argument("--max-products", type=int, default=200000) ap.add_argument("--concurrency", type=int, default=16) ap.add_argument("--user", default=os.getenv("SUMA_USER")) ap.add_argument("--pass", dest="password", default=os.getenv("SUMA_PASS")) ap.add_argument("--db", dest="database_url", default=os.getenv("DATABASE_URL", "postgresql+asyncpg://user:pass@localhost:5432/suma")) args = ap.parse_args() # Ensure we have an OAuth token before starting (triggers device flow if needed) get_access_token(require=True) asyncio.run(init_config()) asyncio.run( build_snapshot( args.concurrency, args.user, args.password, save_nav, capture_listing, upsert_product, log_product_result, save_subcategory_redirects, #save_link_reports ) ) if __name__ == "__main__": main()