From 98aee1f656949783bc66dd14d4c25a58770c31a8 Mon Sep 17 00:00:00 2001 From: giles Date: Thu, 26 Feb 2026 16:06:28 +0000 Subject: [PATCH] Add scrape_to_snapshot.py entry point with OAuth device flow login Restores the missing entry point script for API-mode scraping. Calls get_access_token() before starting to trigger device flow login if no token is saved. Co-Authored-By: Claude Opus 4.6 --- market/scrape_to_snapshot.py | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 market/scrape_to_snapshot.py diff --git a/market/scrape_to_snapshot.py b/market/scrape_to_snapshot.py new file mode 100644 index 0000000..32cb528 --- /dev/null +++ b/market/scrape_to_snapshot.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import asyncio +import os + +from config import init_config + +from scrape.auth import get_access_token +from scrape.build_snapshot import build_snapshot + +from scrape.persist_api import ( + save_nav, + upsert_product, + log_product_result, + capture_listing, + save_subcategory_redirects, +) + + + +# ------------------------ CLI ------------------------ +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--out", default="./snapshot", help="(unused for JSON now; kept for compatibility)") + ap.add_argument("--max-pages", type=int, default=999) + ap.add_argument("--max-products", type=int, default=200000) + ap.add_argument("--concurrency", type=int, default=16) + ap.add_argument("--user", default=os.getenv("SUMA_USER")) + ap.add_argument("--pass", dest="password", default=os.getenv("SUMA_PASS")) + ap.add_argument("--db", dest="database_url", default=os.getenv("DATABASE_URL", "postgresql+asyncpg://user:pass@localhost:5432/suma")) + + args = ap.parse_args() + + # Ensure we have an OAuth token before starting (triggers device flow if needed) + get_access_token(require=True) + + asyncio.run(init_config()) + asyncio.run( + build_snapshot( + args.concurrency, + args.user, + args.password, + save_nav, + capture_listing, + upsert_product, + log_product_result, + save_subcategory_redirects, + #save_link_reports + ) + ) + + +if __name__ == "__main__": + main()