Monorepo: consolidate 7 repos into one
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m5s
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m5s
Combines shared, blog, market, cart, events, federation, and account into a single repository. Eliminates submodule sync, sibling model copying at build time, and per-app CI orchestration. Changes: - Remove per-app .git, .gitmodules, .gitea, submodule shared/ dirs - Remove stale sibling model copies from each app - Update all 6 Dockerfiles for monorepo build context (root = .) - Add build directives to docker-compose.yml - Add single .gitea/workflows/ci.yml with change detection - Add .dockerignore for monorepo build context - Create __init__.py for federation and account (cross-app imports)
This commit is contained in:
104
market/scrape/build_snapshot/build_snapshot.py
Normal file
104
market/scrape/build_snapshot/build_snapshot.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Dict, Set
|
||||
|
||||
from ..http_client import configure_cookies
|
||||
from ..get_auth import login
|
||||
|
||||
from shared.config import config
|
||||
|
||||
from shared.utils import log
|
||||
|
||||
# DB: persistence helpers
|
||||
|
||||
from .tools import (
|
||||
_resolve_sub_redirects,
|
||||
valid_subs,
|
||||
candidate_subs,
|
||||
rewrite_nav,
|
||||
capture_product_slugs,
|
||||
fetch_and_upsert_products,
|
||||
)
|
||||
|
||||
from ..nav import nav_scrape
|
||||
|
||||
# ------------------------ core ------------------------
|
||||
async def build_snapshot(
|
||||
concurrency: int,
|
||||
user: str,
|
||||
password: str,
|
||||
save_nav,
|
||||
capture_listing,
|
||||
upsert_product,
|
||||
log_product_result,
|
||||
save_subcategory_redirects,
|
||||
save_link_reports = None,
|
||||
) -> None:
|
||||
# NOTE: we keep ensure_dir for listings iteration but no longer write JSON files.
|
||||
|
||||
# Make project importable
|
||||
import sys
|
||||
sys.path.insert(0, os.path.abspath("."))
|
||||
|
||||
|
||||
cookies = await login(username=user, password=password)
|
||||
await configure_cookies(cookies)
|
||||
for k, v in dict(cookies).items():
|
||||
print("logged in with", k, v)
|
||||
|
||||
# 1) NAV
|
||||
log("Fetching nav…")
|
||||
nav = await nav_scrape()
|
||||
|
||||
# Build valid subs per top from nav
|
||||
valid_subs_by_top: Dict[str, Set[str]] = valid_subs(nav)
|
||||
|
||||
# Resolve redirects for all subs in nav first
|
||||
nav_sub_candidates = candidate_subs(nav)
|
||||
nav_redirects = await _resolve_sub_redirects(
|
||||
base_url=config()["base_url"],
|
||||
candidates=nav_sub_candidates,
|
||||
allowed_tops=set(config()["categories"]["allow"].values()),
|
||||
valid_subs_by_top=valid_subs_by_top,
|
||||
)
|
||||
rewrite_nav(nav, nav_redirects)
|
||||
|
||||
# DB: save nav
|
||||
await save_nav(nav)
|
||||
|
||||
product_slugs: Set[str] = await capture_product_slugs(
|
||||
nav,
|
||||
capture_listing
|
||||
)
|
||||
unknown_sub_paths: Set[str] = set()
|
||||
|
||||
# 3) PRODUCTS (fetch details)
|
||||
await fetch_and_upsert_products(
|
||||
upsert_product,
|
||||
log_product_result,
|
||||
save_link_reports,
|
||||
concurrency,
|
||||
product_slugs,
|
||||
valid_subs_by_top,
|
||||
unknown_sub_paths
|
||||
)
|
||||
|
||||
# Subcategory redirects from HTML
|
||||
log("Resolving subcategory redirects…")
|
||||
html_redirects = await _resolve_sub_redirects(
|
||||
base_url=config()["base_url"],
|
||||
candidates=unknown_sub_paths,
|
||||
allowed_tops=set(config()["categories"]["allow"].values()),
|
||||
valid_subs_by_top=valid_subs_by_top,
|
||||
)
|
||||
sub_redirects: Dict[str, str] = dict(nav_redirects)
|
||||
sub_redirects.update(html_redirects)
|
||||
|
||||
# DB: persist redirects
|
||||
await save_subcategory_redirects(sub_redirects)
|
||||
|
||||
log("Snapshot build complete (to Postgres).")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user