Add scrape_to_snapshot.py entry point with OAuth device flow login
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 2m13s

Restores the missing entry point script for API-mode scraping.
Calls get_access_token() before starting to trigger device flow
login if no token is saved.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 16:06:28 +00:00
parent 81112c716b
commit 98aee1f656

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import asyncio
import os
from config import init_config
from scrape.auth import get_access_token
from scrape.build_snapshot import build_snapshot
from scrape.persist_api import (
save_nav,
upsert_product,
log_product_result,
capture_listing,
save_subcategory_redirects,
)
# ------------------------ CLI ------------------------
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--out", default="./snapshot", help="(unused for JSON now; kept for compatibility)")
ap.add_argument("--max-pages", type=int, default=999)
ap.add_argument("--max-products", type=int, default=200000)
ap.add_argument("--concurrency", type=int, default=16)
ap.add_argument("--user", default=os.getenv("SUMA_USER"))
ap.add_argument("--pass", dest="password", default=os.getenv("SUMA_PASS"))
ap.add_argument("--db", dest="database_url", default=os.getenv("DATABASE_URL", "postgresql+asyncpg://user:pass@localhost:5432/suma"))
args = ap.parse_args()
# Ensure we have an OAuth token before starting (triggers device flow if needed)
get_access_token(require=True)
asyncio.run(init_config())
asyncio.run(
build_snapshot(
args.concurrency,
args.user,
args.password,
save_nav,
capture_listing,
upsert_product,
log_product_result,
save_subcategory_redirects,
#save_link_reports
)
)
if __name__ == "__main__":
main()