Add scrape_to_snapshot.py entry point with OAuth device flow login
Restores the missing entry point script for API-mode scraping. Calls get_access_token() before starting to trigger device flow login if no token is saved. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
56
market/scrape_to_snapshot.py
Normal file
56
market/scrape_to_snapshot.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from config import init_config
|
||||
|
||||
from scrape.auth import get_access_token
|
||||
from scrape.build_snapshot import build_snapshot
|
||||
|
||||
from scrape.persist_api import (
|
||||
save_nav,
|
||||
upsert_product,
|
||||
log_product_result,
|
||||
capture_listing,
|
||||
save_subcategory_redirects,
|
||||
)
|
||||
|
||||
|
||||
|
||||
# ------------------------ CLI ------------------------
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--out", default="./snapshot", help="(unused for JSON now; kept for compatibility)")
|
||||
ap.add_argument("--max-pages", type=int, default=999)
|
||||
ap.add_argument("--max-products", type=int, default=200000)
|
||||
ap.add_argument("--concurrency", type=int, default=16)
|
||||
ap.add_argument("--user", default=os.getenv("SUMA_USER"))
|
||||
ap.add_argument("--pass", dest="password", default=os.getenv("SUMA_PASS"))
|
||||
ap.add_argument("--db", dest="database_url", default=os.getenv("DATABASE_URL", "postgresql+asyncpg://user:pass@localhost:5432/suma"))
|
||||
|
||||
args = ap.parse_args()
|
||||
|
||||
# Ensure we have an OAuth token before starting (triggers device flow if needed)
|
||||
get_access_token(require=True)
|
||||
|
||||
asyncio.run(init_config())
|
||||
asyncio.run(
|
||||
build_snapshot(
|
||||
args.concurrency,
|
||||
args.user,
|
||||
args.password,
|
||||
save_nav,
|
||||
capture_listing,
|
||||
upsert_product,
|
||||
log_product_result,
|
||||
save_subcategory_redirects,
|
||||
#save_link_reports
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user