All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 2m13s
Restores the missing entry point script for API-mode scraping. Calls get_access_token() before starting to trigger device flow login if no token is saved. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import os
|
|
|
|
from config import init_config
|
|
|
|
from scrape.auth import get_access_token
|
|
from scrape.build_snapshot import build_snapshot
|
|
|
|
from scrape.persist_api import (
|
|
save_nav,
|
|
upsert_product,
|
|
log_product_result,
|
|
capture_listing,
|
|
save_subcategory_redirects,
|
|
)
|
|
|
|
|
|
|
|
# ------------------------ CLI ------------------------
|
|
def main() -> None:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--out", default="./snapshot", help="(unused for JSON now; kept for compatibility)")
|
|
ap.add_argument("--max-pages", type=int, default=999)
|
|
ap.add_argument("--max-products", type=int, default=200000)
|
|
ap.add_argument("--concurrency", type=int, default=16)
|
|
ap.add_argument("--user", default=os.getenv("SUMA_USER"))
|
|
ap.add_argument("--pass", dest="password", default=os.getenv("SUMA_PASS"))
|
|
ap.add_argument("--db", dest="database_url", default=os.getenv("DATABASE_URL", "postgresql+asyncpg://user:pass@localhost:5432/suma"))
|
|
|
|
args = ap.parse_args()
|
|
|
|
# Ensure we have an OAuth token before starting (triggers device flow if needed)
|
|
get_access_token(require=True)
|
|
|
|
asyncio.run(init_config())
|
|
asyncio.run(
|
|
build_snapshot(
|
|
args.concurrency,
|
|
args.user,
|
|
args.password,
|
|
save_nav,
|
|
capture_listing,
|
|
upsert_product,
|
|
log_product_result,
|
|
save_subcategory_redirects,
|
|
#save_link_reports
|
|
)
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|