feat: initialize market app with browsing, product, and scraping code

Split from coop monolith. Includes: - Market/browse/product blueprints - Product sync API - Suma scraping pipeline - Templates for market, browse, and product views - Dockerfile and CI workflow for independent deployment
2026-02-09 23:16:34 +00:00
commit 6271a715a1
142 changed files with 8517 additions and 0 deletions
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -0,0 +1,63 @@
 name: Build and Deploy
 on:
  push:
    branches: [main]
 env:
  REGISTRY: registry.rose-ash.com:5000
  IMAGE: market
 jobs:
  build-and-deploy:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install tools
        run: |
          apt-get update && apt-get install -y --no-install-recommends openssh-client
      - name: Set up SSH
        env:
          SSH_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
          DEPLOY_HOST: ${{ secrets.DEPLOY_HOST }}
        run: |
          mkdir -p ~/.ssh
          echo "$SSH_KEY" > ~/.ssh/id_rsa
          chmod 600 ~/.ssh/id_rsa
          ssh-keyscan -H "$DEPLOY_HOST" >> ~/.ssh/known_hosts 2>/dev/null || true
      - name: Pull latest code on server
        env:
          DEPLOY_HOST: ${{ secrets.DEPLOY_HOST }}
        run: |
          ssh "root@$DEPLOY_HOST" "
            cd /root/market
            git fetch origin main
            git reset --hard origin/main
          "
      - name: Build and push image
        env:
          DEPLOY_HOST: ${{ secrets.DEPLOY_HOST }}
        run: |
          ssh "root@$DEPLOY_HOST" "
            cd /root/market
            docker build --build-arg CACHEBUST=\$(date +%s) -t ${{ env.REGISTRY }}/${{ env.IMAGE }}:latest -t ${{ env.REGISTRY }}/${{ env.IMAGE }}:${{ github.sha }} .
            docker push ${{ env.REGISTRY }}/${{ env.IMAGE }}:latest
            docker push ${{ env.REGISTRY }}/${{ env.IMAGE }}:${{ github.sha }}
          "
      - name: Deploy stack
        env:
          DEPLOY_HOST: ${{ secrets.DEPLOY_HOST }}
        run: |
          ssh "root@$DEPLOY_HOST" "
            cd /root/market
            source .env
            docker stack deploy -c docker-compose.yml market
            echo 'Waiting for services to update...'
            sleep 10
            docker stack services market
          "
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,12 @@
 __pycache__/
 *.pyc
 *.pyo
 .env
 node_modules/
 _snapshot/
 _debug/
 *.egg-info/
 dist/
 build/
 .venv/
 venv/
--- a/33
+++ b/33
@@ -0,0 +1,33 @@
 # syntax=docker/dockerfile:1
 # ---------- Python application ----------
 FROM python:3.11-slim AS base
 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    APP_PORT=8000 \
    APP_MODULE=app:app
 WORKDIR /app
 # Install system deps + psql client
 RUN apt-get update && apt-get install -y --no-install-recommends \
      ca-certificates \
      postgresql-client \
    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./requirements.txt
 RUN pip install -r requirements.txt
 COPY . .
 # ---------- Runtime setup ----------
 COPY entrypoint.sh /usr/local/bin/entrypoint.sh
 RUN chmod +x /usr/local/bin/entrypoint.sh
 RUN useradd -m -u 10001 appuser && chown -R appuser:appuser /app
 USER appuser
 EXPOSE ${APP_PORT}
 ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,67 @@
 # Market App
 Product browsing and marketplace application for the Rose Ash cooperative.
 ## Overview
 The Market app is one of three microservices split from the original coop monolith:
 - **coop** (:8000) - Blog, calendar, auth, settings
 - **market** (:8001) - Product browsing, categories, product detail
 - **cart** (:8002) - Shopping cart, orders, payments
 ## Architecture
 - **Framework:** Quart (async Flask)
 - **Database:** PostgreSQL 16 with SQLAlchemy 2.0 (async)
 - **Cache:** Redis (tag-based page cache)
 - **Frontend:** HTMX + Jinja2 + Tailwind CSS
 - **Data:** Products scraped from Suma Wholesale
 ## Blueprints
 - `bp/market/` - Market root (navigation, category listing)
 - `bp/browse/` - Product browsing with filters and infinite scroll
 - `bp/product/` - Product detail pages
 - `bp/api/` - Product sync API (used by scraper)
 ## Development
    # Install dependencies
    pip install -r requirements.txt
    # Set environment variables
    export $(grep -v '^#' .env | xargs)
    # Run migrations
    alembic upgrade head
    # Scrape products
    bash scrape.sh
    # Run the dev server
    hypercorn app:app --reload --bind 0.0.0.0:8001
 ## Scraping
    # Full scrape (max 50 pages, 200k products, 8 concurrent)
    bash scrape.sh
    # Test scraping
    bash scrape-test.sh
 ## Docker
    docker build -t market .
    docker run -p 8001:8000 --env-file .env market
 ## Environment Variables
    DATABASE_URL_ASYNC=postgresql+asyncpg://user:pass@localhost/coop
    REDIS_URL=redis://localhost:6379/0
    SECRET_KEY=your-secret-key
    SUMA_USER=your-suma-username
    SUMA_PASSWORD=your-suma-password
    APP_URL_COOP=http://localhost:8000
    APP_URL_MARKET=http://localhost:8001
    APP_URL_CART=http://localhost:8002
--- a/app.py
+++ b/app.py
@@ -0,0 +1,54 @@
 from __future__ import annotations
 from quart import g
 from shared.factory import create_base_app
 from config import config
 from suma_browser.app.bp import register_market_bp
 async def market_context() -> dict:
    """
    Market app context processor.
    - menu_items: fetched from coop internal API
    - cart_count/cart_total: fetched from cart internal API
    """
    from shared.context import base_context
    from shared.internal_api import get as api_get, dictobj
    ctx = await base_context()
    # Menu items from coop API (wrapped for attribute access in templates)
    menu_data = await api_get("coop", "/internal/menu-items")
    ctx["menu_items"] = dictobj(menu_data) if menu_data else []
    # Cart data from cart API
    cart_data = await api_get("cart", "/internal/cart/summary", forward_session=True)
    if cart_data:
        ctx["cart_count"] = cart_data.get("count", 0)
        ctx["cart_total"] = cart_data.get("total", 0)
    else:
        ctx["cart_count"] = 0
        ctx["cart_total"] = 0
    return ctx
 def create_app() -> "Quart":
    app = create_base_app("market", context_fn=market_context)
    # Market blueprint at root (was /market in monolith)
    app.register_blueprint(
        register_market_bp(
            url_prefix="/",
            title=config()["coop_title"],
        ),
        url_prefix="/",
    )
    return app
 app = create_app()
--- a/bp/init.py
+++ b/bp/init.py
--- a/bp/api/init.py
+++ b/bp/api/init.py
--- a/bp/api/routes.py
+++ b/bp/api/routes.py
@@ -0,0 +1,414 @@
 # products_api_async.py
 from __future__ import annotations
 from datetime import datetime, timezone
 from decimal import Decimal
 from typing import Any, Dict, List, Tuple, Iterable, Optional
 from quart import Blueprint, request, jsonify, g
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import selectinload
 from scrape.persist_snapshot.log_product_result import _log_product_result
 from scrape.persist_snapshot.save_nav import _save_nav
 from scrape.persist_snapshot.capture_listing import _capture_listing
 from scrape.persist_snapshot.save_subcategory_redirects import _save_subcategory_redirects
 # ⬇️ Import your models (names match your current file)
 from models.market import (
    Product,
    ProductImage,
    ProductSection,
    ProductLabel,
    ProductSticker,
    ProductAttribute,
    ProductNutrition,
    ProductAllergen,
 )
 from suma_browser.app.redis_cacher import clear_cache
 from suma_browser.app.csrf import csrf_exempt
 products_api = Blueprint("products_api", __name__, url_prefix="/api/products")
 # ---- Comparison config (matches your schema) --------------------------------
 PRODUCT_FIELDS: List[str] = [
    "slug",
    "title",
    "image",
    "description_short",
    "description_html",
    "suma_href",
    "brand",
    "rrp", "rrp_currency", "rrp_raw",
    "price_per_unit", "price_per_unit_currency", "price_per_unit_raw",
    "special_price", "special_price_currency", "special_price_raw",
    "regular_price", "regular_price_currency", "regular_price_raw",
    "oe_list_price",
    "case_size_count", "case_size_item_qty", "case_size_item_unit", "case_size_raw",
    "ean", "sku", "unit_size", "pack_size",
 ]
 # rel_name -> (Model, fields_to_compare, key_for_orderless_compare)
 CHILD_SPECS: Dict[str, Tuple[Any, List[str], str]] = {
    "images":     (ProductImage,   ["url", "position", "kind"], "url"),
    "sections":   (ProductSection, ["title", "html"], "title"),
    "labels":     (ProductLabel,   ["name"], "name"),
    "stickers":   (ProductSticker, ["name"], "name"),
    "attributes": (ProductAttribute, ["key", "value"], "key"),
    "nutrition":  (ProductNutrition, ["key", "value", "unit"], "key"),
    "allergens":  (ProductAllergen, ["name", "contains"], "name"),
 }
 def _now_utc():
    return datetime.now(timezone.utc)
 def _norm_scalar(v: Any) -> Any:
    if isinstance(v, Decimal):
        s = format(v.normalize(), "f")
        return "0" if s in ("-0", "-0.0") else s
    if isinstance(v, bool):
        return bool(v)
    if isinstance(v, (int, float, str)) or v is None:
        return v
    return str(v)
 def _normalize_row(obj: Dict[str, Any], keep: List[str]) -> Dict[str, Any]:
    out: Dict[str, Any] = {}
    for f in keep:
        val = obj.get(f)
        if isinstance(val, str):
            val = val.strip()
        out[f] = _norm_scalar(val)
    return out
 def _list_to_index(items: Iterable[Dict[str, Any]], uniq: str) -> Dict[Any, Dict[str, Any]]:
    ix: Dict[Any, Dict[str, Any]] = {}
    for it in items or []:
        key = it.get(uniq)
        if key is None:
            continue
        ix[key] = it
    return ix
 def _serialize_product_for_compare(p: Product) -> Dict[str, Any]:
    root: Dict[str, Any] = {f: _norm_scalar(getattr(p, f)) for f in PRODUCT_FIELDS}
    for rel_name, (_Model, fields, uniq) in CHILD_SPECS.items():
        rows: List[Dict[str, Any]] = []
        for child in getattr(p, rel_name) or []:
            rows.append({f: _norm_scalar(getattr(child, f)) for f in fields})
        root[rel_name] = _list_to_index(rows, uniq)
    return root
 def _serialize_payload_for_compare(payload: Dict[str, Any]) -> Dict[str, Any]:
    root = _normalize_row(payload, PRODUCT_FIELDS)
    for rel_name, (_Model, fields, uniq) in CHILD_SPECS.items():
        rows = payload.get(rel_name) or []
        rows = [r for r in rows if isinstance(r, dict)]
        root[rel_name] = _list_to_index([_normalize_row(r, fields) for r in rows], uniq)
    return root
 from decimal import Decimal, InvalidOperation
 def _is_numeric_like(x) -> bool:
    if isinstance(x, bool):
        return False
    if isinstance(x, (int, float, Decimal)):
        return True
    if isinstance(x, str):
        s = x.strip()
        if not s:
            return False
        try:
            Decimal(s)
            return True
        except InvalidOperation:
            return False
    return False
 def _to_decimal(x) -> Decimal:
    if isinstance(x, Decimal):
        return x
    if isinstance(x, bool) or x is None:
        raise InvalidOperation
    if isinstance(x, (int, str)):
        return Decimal(str(x).strip())
    if isinstance(x, float):
        return Decimal(str(x))  # avoid float fp artifacts
    # last resort: string-coerce
    return Decimal(str(x).strip())
 def values_different(av, bv) -> bool:
    # match original None semantics first
    if bv is None:
        return av is not None
    if av is None:
        return True
    if _is_numeric_like(bv):
        try:
            return _to_decimal(av) != _to_decimal(bv)
        except InvalidOperation:
            # av isn't numeric-parsable → different
            return True
    else:
        # non-numeric: compare as strings (like original)
        return f"{av}" != f"{bv}"
 import re
 _cf_a_re = re.compile(r'<a[^>]+/cdn-cgi/l/email-protection#[^"]+"[^>]*>(.*?)</a>', re.I | re.S)
 _cf_span_re = re.compile(r'<span[^>]*class="__cf_email__"[^>]*>(.*?)</span>', re.I | re.S)
 _cf_data_attr_re = re.compile(r'\sdata-cfemail="[^"]+"', re.I)
 _ws_re = re.compile(r'\s+')
 def normalize_cf_email(html: str) -> str:
    if not isinstance(html, str):
        return html
    s = html
    # Replace CF spans with their inner text
    s = _cf_span_re.sub(r'\1', s)
    # Replace CF protection anchors with their inner text
    s = _cf_a_re.sub(r'\1', s)
    # Drop the data-cfemail attribute if any remains
    s = _cf_data_attr_re.sub('', s)
    # Optional: collapse whitespace
    s = _ws_re.sub(' ', s).strip()
    return s
 def _deep_equal(a: Dict[str, Any], b: Dict[str, Any]) -> bool:
    # keys must match at this level
    if a.keys() != b.keys():
        return False
    for k in a.keys():
        av, bv = a[k], b[k]
        # Dicts: recurse, but don't return early unless it's False
        if isinstance(av, dict) and isinstance(bv, dict):
            if not _deep_equal(av, bv):
                # log_diff(k, av, bv)  # optional
                return False
            continue
        # Lists/Tuples: compare length then elements (order-sensitive here)
        if isinstance(av, (list, tuple)) and isinstance(bv, (list, tuple)):
            if len(av) != len(bv):
                # log_diff(k, av, bv)
                return False
            for i, (ai, bi) in enumerate(zip(av, bv)):
                # nested dicts within lists
                if isinstance(ai, dict) and isinstance(bi, dict):
                    if not _deep_equal(ai, bi):
                        return False
                else:
                    if values_different(normalize_cf_email(ai), normalize_cf_email(bi)):
                        return False
            continue
        # Scalars / everything else
        if values_different(normalize_cf_email(av), normalize_cf_email(bv)):
            # print('!!deep', k, av, bv)
            return False
    return True
 # ---- Mutation helpers -------------------------------------------------------
 def _apply_product_fields(p: Product, payload: Dict[str, Any]) -> None:
    for f in PRODUCT_FIELDS:
        setattr(p, f, payload.get(f))
    p.updated_at = _now_utc()
 def _replace_children(p: Product, payload: Dict[str, Any]) -> None:
    # replace each relation wholesale (delete-orphan takes care of removal)
    #p.images.clear()
    for row in payload.get("images") or []:
        p.images.append(ProductImage(
            url=row.get("url"),
            position=row.get("position") or 0,
            kind=row.get("kind") or "gallery",
            created_at=_now_utc(), updated_at=_now_utc(),
        ))
    #p.sections.clear()
    for row in payload.get("sections") or []:
        p.sections.append(ProductSection(
            title=row.get("title") or "",
            html=row.get("html") or "",
            created_at=_now_utc(), updated_at=_now_utc(),
        ))
    #p.labels.clear()
    for row in payload.get("labels") or []:
        p.labels.append(ProductLabel(
            name=row.get("name") or "",
            created_at=_now_utc(), updated_at=_now_utc(),
        ))
    #p.stickers.clear()
    for row in payload.get("stickers") or []:
        p.stickers.append(ProductSticker(
            name=row.get("name") or "",
            created_at=_now_utc(), updated_at=_now_utc(),
        ))
    #p.attributes.clear()
    for row in payload.get("attributes") or []:
        p.attributes.append(ProductAttribute(
            key=row.get("key") or "",
            value=row.get("value"),
            created_at=_now_utc(), updated_at=_now_utc(),
        ))
    #p.nutrition.clear()
    for row in payload.get("nutrition") or []:
        p.nutrition.append(ProductNutrition(
            key=row.get("key") or "",
            value=row.get("value"),
            unit=row.get("unit"),
            created_at=_now_utc(), updated_at=_now_utc(),
        ))
    #p.allergens.clear()
    for row in payload.get("allergens") or []:
        p.allergens.append(ProductAllergen(
            name=row.get("name") or "",
            contains=bool(row.get("contains", False)),
            created_at=_now_utc(), updated_at=_now_utc(),
        ))
 async def _create_product_from_payload(session: AsyncSession, payload: Dict[str, Any]) -> Product:
    p = Product()
    _apply_product_fields(p, payload)
    p.created_at = _now_utc()
    p.deleted_at = None
    session.add(p)
    #await session.flush()  # get p.id
    _replace_children(p, payload)
    await session.flush()
    return p
 # ---- API --------------------------------------------------------------------
@csrf_exempt
@products_api.post("/listing/")
@clear_cache(tag='browse')
 async def capture_lsting():
    data: Dict[str, Any] = await request.get_json(force=True, silent=False)
    url = data['url']
    items = data['items']
    total_pages = data['total_pages']
    await _capture_listing(g.s, url,items, total_pages)
    return {"ok": True}
@csrf_exempt
@products_api.post("/log/")
@clear_cache(tag='browse')
 async def log_product():
    data: Dict[str, Any] = await request.get_json(force=True, silent=False)
    ok = bool(data["ok"])
    payload = data.get("payload") or {}
    try:
      await _log_product_result(g.s, ok, payload)
      return {"ok": True}
    except Exception as e:
      return {"ok": False}
@csrf_exempt
@products_api.post("/redirects/")
@clear_cache(tag='browse')
 async def rediects():
    data: Dict[str, str] = await request.get_json(force=True, silent=False)
    await _save_subcategory_redirects(g.s, data)
    return {"ok": True}
@csrf_exempt
@products_api.post("/nav/")
@clear_cache(tag='browse')
 async def save_nav():
    data: Dict[str, Any] = await request.get_json(force=True, silent=False)
    await _save_nav(g.s, data)
    return {"ok": True}
@csrf_exempt
@products_api.post("/sync/")
@clear_cache(tag='browse')
 async def sync_product():
    """
    POST /api/products/sync
    Body includes top-level fields and child arrays like:
    {
      "slug": "my-product",
      "title": "...",
      "images": [{"url":"https://..","position":0,"kind":"gallery"}],
      "sections": [{"title":"Details","html":"<p>..</p>"}],
      "labels": [{"name":"Vegan"}],
      "stickers": [{"name":"Sale"}],
      "attributes": [{"key":"Country","value":"UK"}],
      "nutrition": [{"key":"Energy","value":"100","unit":"kcal"}],
      "allergens": [{"name":"Nuts","contains":true}]
    }
    """
    payload = await request.get_json(force=True, silent=False)
    if not isinstance(payload, dict):
        return jsonify({"error": "Invalid JSON"}), 400
    slug = payload.get("slug")
    if not isinstance(slug, str) or not slug:
        return jsonify({"error": "Missing 'slug'"}), 400
      # find undeleted row by slug
      #stmt = select(Product).where(Product.slug == slug, Product.deleted_at.is_(None))
    stmt = (
        select(Product)
        .where(Product.slug == slug, Product.deleted_at.is_(None))
        .options(
            selectinload(Product.images),
            selectinload(Product.sections),
            selectinload(Product.labels),
            selectinload(Product.stickers),
            selectinload(Product.attributes),
            selectinload(Product.nutrition),
            selectinload(Product.allergens),
        )
    )
    existing: Optional[Product] = (await g.s.execute(stmt)).scalars().first()
    incoming_norm = _serialize_payload_for_compare(payload)
    if existing:
        db_norm = _serialize_product_for_compare(existing)
        if _deep_equal(db_norm, incoming_norm):
            # Exactly equal → just touch updated_at
            existing.updated_at = _now_utc()
            await g.s.flush()
            return jsonify({"id": existing.id, "action": "touched"}), 200
        # Different → soft delete old + create a new row
        existing.deleted_at = _now_utc()
        await g.s.flush()  # ensure the soft-delete is persisted before inserting the new row
        new_p = await _create_product_from_payload(g.s, payload)
        await g.s.flush()
        return jsonify({"id": new_p.id, "action": "replaced"}), 201
    # Not found → create
    new_p = await _create_product_from_payload(g.s, payload)
    await g.s.flush()
    return jsonify({"id": new_p.id, "action": "created"}), 201
--- a/bp/browse/init.py
+++ b/bp/browse/init.py
@@ -0,0 +1,7 @@
 from __future__ import annotations
 # create the blueprint at package import time
 from .routes import register # = Blueprint("browse_bp", __name__)
 # import routes AFTER browse_bp is defined so routes can attach to it
 from . import routes  # noqa: F401
--- a/bp/browse/routes.py
+++ b/bp/browse/routes.py
@@ -0,0 +1,162 @@
 from __future__ import annotations
 from quart import (
    g,
    Blueprint,
    abort,
    render_template,
    render_template_string,
    make_response,
    current_app,
 )
 from config import config
 from .services.nav import category_context, get_nav
 from .services.blacklist.category import is_category_blocked
 from .services import (
  _hx_fragment_request,
  _productInfo,
  _vary,
  _current_url_without_page,
 )
 from suma_browser.app.redis_cacher import cache_page
 from suma_browser.app.utils.htmx import is_htmx_request
 def register():
  browse_bp = Blueprint("browse", __name__)
  from .. import register_product
  browse_bp.register_blueprint(
    register_product(),
  )
  @browse_bp.get("/")
  @cache_page(tag="browse")
  async def home():
      """
      Market landing page.
      Shows the Ghost CMS post with slug='market'.
      """
      from shared.internal_api import get as api_get
      # Fetch the market post from coop internal API
      p_data = await api_get("coop", "/internal/post/market")
      if not p_data:
          abort(404)
      # Determine which template to use based on request type
      if not is_htmx_request():
          # Normal browser request: full page with layout
          html = await render_template("_types/market/index.html", **p_data)
      else:
          # HTMX request: main panel + OOB elements
          html = await render_template("_types/market/_oob_elements.html", **p_data)
      return await make_response(html)
  @browse_bp.get("/all/")
  @cache_page(tag="browse")
  async def browse_all():
      """
      Browse all products across all categories.
      Renders full page or just product cards (HTMX pagination fragment).
      """
      nav = await get_nav(g.s)
      ctx = {
          "category_label": "All Products",
          "top_slug": "all",
          "sub_slug": None,
      }
      product_info = await _productInfo()
      full_context = {**product_info, **ctx}
      # Determine which template to use based on request type and pagination
      if not is_htmx_request():
          # Normal browser request: full page with layout
          html = await render_template("_types/browse/index.html", **full_context)
      elif product_info["page"] > 1:
          # HTMX pagination: just product cards + sentinel
          html = await render_template("_types/browse/_product_cards.html", **product_info)
      else:
          # HTMX navigation (page 1): main panel + OOB elements
          html = await render_template("_types/browse/_oob_elements.html", **full_context)
      resp = await make_response(html)
      resp.headers["Hx-Push-Url"] = _current_url_without_page()
      return _vary(resp)
  @browse_bp.get("/<top_slug>/")
  @cache_page(tag="browse")
  async def browse_top(top_slug: str):
      """
      Browse by top-level category (e.g. /fruit).
      404 if category not in allowed list or is blocked.
      """
      REVERSE_CATEGORY = {v: k for k, v in config()["categories"]["allow"].items()}
      if top_slug not in REVERSE_CATEGORY:
          abort(404)
      if is_category_blocked(top_slug):
          abort(404)
      nav = await get_nav(g.s)
      ctx = category_context(top_slug, None, nav)
      product_info = await _productInfo(top_slug)
      full_context = {**product_info, **ctx}
      # Determine which template to use based on request type and pagination
      if not is_htmx_request():
          # Normal browser request: full page with layout
          html = await render_template("_types/browse/index.html", **full_context)
      elif product_info["page"] > 1:
          # HTMX pagination: just product cards + sentinel
          html = await render_template("_types/browse/_product_cards.html", **product_info)
      else:
          html = await render_template("_types/browse/_oob_elements.html", **full_context)
      resp = await make_response(html)
      resp.headers["Hx-Push-Url"] = _current_url_without_page()
      return _vary(resp)
  @browse_bp.get("/<top_slug>/<sub_slug>/")
  @cache_page(tag="browse")
  async def browse_sub(top_slug: str, sub_slug: str):
      """
      Browse by subcategory (e.g. /fruit/citrus).
      404 if blocked or unknown.
      """
      REVERSE_CATEGORY = {v: k for k, v in config()["categories"]["allow"].items()}
      if top_slug not in REVERSE_CATEGORY:
          abort(404)
      if is_category_blocked(top_slug, sub_slug):
          abort(404)
      nav = await get_nav(g.s)
      ctx = category_context(top_slug, sub_slug, nav)
      product_info = await _productInfo(top_slug, sub_slug)
      full_context = {**product_info, **ctx}
      # Determine which template to use based on request type and pagination
      if not is_htmx_request():
          # Normal browser request: full page with layout
          html = await render_template("_types/browse/index.html", **full_context)
      elif product_info["page"] > 1:
          # HTMX pagination: just product cards + sentinel
          html = await render_template("_types/browse/_product_cards.html", **product_info)
      else:
          # HTMX navigation (page 1): main panel + OOB elements
          html = await render_template("_types/browse/_oob_elements.html", **full_context)
      resp = await make_response(html)
      resp.headers["Hx-Push-Url"] = _current_url_without_page()
      return _vary(resp)
  return browse_bp
--- a/bp/browse/services/init.py
+++ b/bp/browse/services/init.py
@@ -0,0 +1,13 @@
 from __future__ import annotations
 from quart import Blueprint
 from .services import (
  _hx_fragment_request,
  _productInfo,
  _order_brands_selected_first,
  _massage_product,
  _vary,
  _current_url_without_page,
  _is_liked
 )
--- a/bp/browse/services/blacklist/category.py
+++ b/bp/browse/services/blacklist/category.py
@@ -0,0 +1,12 @@
 # suma_browser/category_blacklist.py
 from __future__ import annotations
 from typing import Optional
 from config import config
 def _norm(s: str) -> str:
    return (s or "").strip().lower().strip("/")
 def is_category_blocked(top_slug: str, sub_slug: Optional[str] = None) -> bool:
    if sub_slug:
        return is_category_blocked(top_slug) or _norm(f"{top_slug}/{sub_slug}") in config()["blacklist"]["category"]
    return _norm(top_slug) in config()["blacklist"]["category"]
--- a/bp/browse/services/blacklist/product.py
+++ b/bp/browse/services/blacklist/product.py
@@ -0,0 +1,15 @@
 from typing import Set, Optional
 from ..slugs import canonical_html_slug
 from config import config
 _blocked: Set[str] = set()
 _mtime: Optional[float] = None
 def _norm(slug: str) -> str:
    slug = (slug or "").strip().strip("/").lower()
    if slug.startswith("product/"):
        slug = slug.split("/", 1)[1]
    return canonical_html_slug(slug)
 def is_product_blocked(slug: str) -> bool:
    return _norm(slug) in config()["blacklist"]["product"]
--- a/bp/browse/services/blacklist/product_details.py
+++ b/bp/browse/services/blacklist/product_details.py
@@ -0,0 +1,11 @@
 import re
 from config import config
 def _norm_title_key(t: str) -> str:
    t = (t or "").strip().lower()
    t = re.sub(r":\s*$", "", t)
    t = re.sub(r"\s+", " ", t)
    return t
 def is_blacklisted_heading(title: str) -> bool:
    return _norm_title_key(title) in  [s.lower() for s in config()["blacklist"]["product-details"]]
--- a/bp/browse/services/cache_backend.py
+++ b/bp/browse/services/cache_backend.py
@@ -0,0 +1,367 @@
 from __future__ import annotations
 import os, json
 from typing import List, Optional
 from config import config
 from .blacklist.product import is_product_blocked
 def _json(path: str):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)
 def fs_nav():
    path = os.path.join(config()["cache"]["fs_root"], "nav.json")
    return _json(path)
 def _brand_of(item: dict) -> str:
    b = (item.get("brand") or "").strip()
    if b:
        return b
    try:
        return (item.get("info_table", {}).get("Brand") or "").strip()
    except Exception:
        return ""
 def _stickers_of(item: dict) -> List[str]:
    vals = item.get("stickers") or []
    out = []
    for v in vals:
        s = (str(v) or "").strip().lower()
        if s:
            out.append(s)
    return out
 def fs_product_by_slug(slug: str):
    slug = (slug or "").strip()
    if slug.endswith(".json"):
        path = os.path.join(config()["cache"]["fs_root"], "products", slug)
    else:
        path = os.path.join(config()["cache"]["fs_root"], "products", f"{slug}.json")
    return _json(path)
 def fs_count_products_in_sub(top_slug: str, sub_slug: Optional[str]) -> int:
    """
    Return how many products are in the listing for (top_slug, sub_slug),
    after filtering out blocked products.
    If sub_slug is None, that's the top-level category listing.
    """
    fs_root = config()["cache"]["fs_root"]
    # Build path to listings/.../items.json just like fs_products does
    parts = ["listings", top_slug]
    if sub_slug:
        parts.append(sub_slug)
    parts.append("items.json")
    path = os.path.join(fs_root, *parts)
    if not os.path.exists(path):
        return 0
    try:
        all_slugs = _json(path)
    except Exception:
        return 0
    # Filter out blocked products
    allowed = [
        slug for slug in all_slugs
        if not is_product_blocked(slug)
    ]
    return len(allowed)
 def fs_products(
    top_slug: str | None,
    sub_slug: str | None,
    selected_brands: Optional[List[str]] = None,
    selected_stickers: Optional[List[str]] = None,
    selected_labels: Optional[List[str]] = None,
    page: int = 1,
    search: Optional[str] = None,
    sort: Optional[str] = None,
    page_size: int = 20,
    # NEW: only include products the current user has liked
    liked_slugs: Optional[List[str]] = None,
    liked: bool = None,
 ):
    """
    Returns:
      {
        "total_pages": int,
        "items": [product dict ...],               # filtered + paginated (sorted)
        "brands": [{"name": str, "count": int}],
        "stickers": [{"name": str, "count": int}],
        "labels": [{"name": str, "count": int}],
      }
    Filters:
    - top_slug / sub_slug scope
    - selected_brands
    - selected_stickers
    - selected_labels
    - search
    - liked_slugs (if provided)
    """
    import os
    from typing import List, Dict
    fs_root = config()["cache"]["fs_root"]
    # ---------- Collect slugs ----------
    slugs: List[str] = []
    if top_slug:  # normal listing path
        parts = ["listings", top_slug]
        if sub_slug:
            parts.append(sub_slug)
        parts.append("items.json")
        path = os.path.join(fs_root, *parts)
        if os.path.exists(path):
            try:
                slugs = [s for s in _json(path) if not is_product_blocked(s)]
            except Exception:
                slugs = []
    else:
        # No top slug: include ALL products from /products/*.json
        products_dir = os.path.join(fs_root, "products")
        try:
            for fname in os.listdir(products_dir):
                if not fname.endswith(".json"):
                    continue
                slug = fname[:-5]  # strip .json
                if not is_product_blocked(slug):
                    slugs.append(slug)
        except FileNotFoundError:
            slugs = []
    # ---------- Load product dicts ----------
    all_items: List[dict] = []
    for slug in slugs:
        try:
            item = fs_product_by_slug(slug)
            if isinstance(item, dict):
                all_items.append(item)
        except Exception:
            continue
    # Stable deterministic ordering when aggregating everything (name ASC)
    def _title_key(it: dict) -> tuple:
        title = (it.get("title") or it.get("name") or it.get("slug") or "").strip().lower()
        return (title, it.get("slug") or "")
    all_items.sort(key=_title_key)
    # ---------- Helpers for filters & counts ----------
    def _brand_of_local(item: dict) -> str:
        b = item.get("brand") or (item.get("info_table") or {}).get("Brand")
        return (b or "").strip()
    def _stickers_of_local(item: dict) -> List[str]:
        vals = item.get("stickers") or []
        out = []
        for s in vals:
            if isinstance(s, str):
                s2 = s.strip().lower()
                if s2:
                    out.append(s2)
        return out
    def _labels_of_local(item: dict) -> List[str]:
        vals = item.get("labels") or []
        out = []
        for s in vals:
            if isinstance(s, str):
                s2 = s.strip().lower()
                if s2:
                    out.append(s2)
        return out
    sel_brands = [
        (s or "").strip().lower()
        for s in (selected_brands or [])
        if (s or "").strip()
    ]
    sel_stickers = [
        (s or "").strip().lower()
        for s in (selected_stickers or [])
        if (s or "").strip()
    ]
    sel_labels = [
        (s or "").strip().lower()
        for s in (selected_labels or [])
        if (s or "").strip()
    ]
    search_q = (search or "").strip().lower() or None
    liked_set = {
        (slug or "").strip().lower()
        for slug in (liked_slugs or [] if liked else [])
        if (slug or "").strip()
    }
    real_liked_set = {
        (slug or "").strip().lower()
        for slug in (liked_slugs or [])
        if (slug or "").strip()
    }    
    def matches_brand(item: dict) -> bool:
        if not sel_brands:
            return True
        return _brand_of_local(item).strip().lower() in sel_brands
    def has_all_selected_stickers(item: dict) -> bool:
        if not sel_stickers:
            return True
        tags = set(_stickers_of_local(item))
        return all(s in tags for s in sel_stickers)
    def has_all_selected_labels(item: dict) -> bool:
        if not sel_labels:
            return True
        tags = set(_labels_of_local(item))
        return all(s in tags for s in sel_labels)
    def matches_search(item: dict) -> bool:
        if not search_q:
            return True
        desc = (item.get("description_short") or "").strip().lower()
        return search_q in desc
    def is_liked(item: dict) -> bool:
        """
        True if this item should be shown under the liked filter.
        If liked_set is empty, treat everything as allowed.
        """
        slug_val = (item.get("slug") or "").strip().lower()
        return slug_val in real_liked_set
    # ---------- Counts (dependent on other filters + search + liked) ----------
    brand_counts: Dict[str, int] = {}
    for b in (selected_brands or []):
        brand_counts[b] = 0
    for it in all_items:
        b = _brand_of_local(it)
        if not b:
            continue
        brand_counts[b] = brand_counts.get(b, 0) + 1
    sticker_counts: Dict[str, int] = {}
    for s in (selected_stickers or []):
        sticker_counts[s] = 0
    for it in all_items:
        for s in _stickers_of_local(it):
            sticker_counts[s] = sticker_counts.get(s, 0) + 1
    label_counts: Dict[str, int] = {}
    for s in (selected_labels or []):
        label_counts[s] = 0
    for it in all_items:
        for s in _labels_of_local(it):
            label_counts[s] = label_counts.get(s, 0) + 1
    liked_count = 0
    for it in all_items:
      if is_liked(it):
        liked_count += 1
    search_count=0
    for it in all_items:
      if matches_search(it):
        search_count += 1
    # ---------- Apply filters ----------
    filtered = [
        it
        for it in all_items
        if matches_brand(it)
        and has_all_selected_stickers(it)
        and has_all_selected_labels(it)
        and matches_search(it)
        and (not liked or is_liked(it))
    ]
    # ---------- Sorting ----------
    sort_mode = (sort or "az").strip().lower()
    def _price_key(item: dict):
        p = item["regular_price"]
        title, slug = _title_key(item)
        return (0 if p is not None else 1, p if p is not None else 0, title, slug)
    def _price_key_desc(item: dict):
        p = item["regular_price"]
        title, slug = _title_key(item)
        return (
            0 if p is not None else 1,
            -(p if p is not None else 0),
            title,
            slug,
        )
    if sort_mode in ("az",):
        filtered.sort(key=_title_key)
    elif sort_mode in ("za",):
        filtered.sort(key=_title_key, reverse=True)
    elif sort_mode in (
        "price-asc", "price_asc", "price-low", "price-low-high", "low-high", "lo-hi"
    ):
        filtered.sort(key=_price_key)
    elif sort_mode in (
        "price-desc", "price_desc", "price-high", "price-high-low", "high-low", "hi-lo"
    ):
        filtered.sort(key=_price_key_desc)
    else:
        filtered.sort(key=_title_key)
    # ---------- Pagination ----------
    total_pages = max(1, (len(filtered) + page_size - 1) // page_size)
    page = max(1, page)
    start = (page - 1) * page_size
    end = start + page_size
    page_items = filtered[start:end]
    # ---------- Format counts lists ----------
    brands_list = sorted(
        [{"name": k, "count": v} for k, v in brand_counts.items()],
        key=lambda x: (-x["count"], x["name"].lower()),
    )
    stickers_list = sorted(
        [{"name": k, "count": v} for k, v in sticker_counts.items()],
        key=lambda x: (-x["count"], x["name"]),
    )
    labels_list = sorted(
        [{"name": k, "count": v} for k, v in label_counts.items()],
        key=lambda x: (-x["count"], x["name"]),
    )
    return {
        "total_pages": total_pages,
        "items": page_items,
        "brands": brands_list,
        "stickers": stickers_list,
        "labels": labels_list,
        "liked_count": liked_count,
        "search_count": search_count
    }
 # async wrappers (unchanged)
 async def read_nav():
    return fs_nav()
 async def read_listing(top_slug: str, sub_slug: str | None, page: int):
    return fs_products(top_slug, sub_slug, None, None, page)
 async def read_product(slug_or_path: str):
    slug = (slug_or_path or "").strip()
    if "/" in slug:
        slug = slug.rsplit("/", 1)[-1]
    slug = slug.split("?", 1)[0]
    return fs_product_by_slug(slug)
--- a/bp/browse/services/db_backend.py
+++ b/bp/browse/services/db_backend.py
@@ -0,0 +1,657 @@
 from __future__ import annotations
 from typing import Dict, List, Optional
 from sqlalchemy import select, and_
 from sqlalchemy.orm import selectinload
 from config import config  # if unused elsewhere, you can remove this import
 # ORM models
 from models.market import (
    Product, ProductImage, ProductSection,
    Listing, ListingItem,
    NavTop, NavSub,
    ProductSticker, ProductLabel,
    ProductAttribute, ProductNutrition, ProductAllergen, ProductLike
 )
 from sqlalchemy import func, case
 # ---------- helpers ----------
 def _regular_price_of(p: Product) -> Optional[float]:
    try:
        return (
            float(p.regular_price)
            if p.regular_price is not None
            else (
                float(p.special_price)
                if p.special_price is not None
                else None
            )
        )
    except Exception:
        return None
 # ---------- NAV ----------
 async def db_nav(session) -> Dict:
    tops = (await session.execute(select(NavTop))).scalars().all()
    subs = (await session.execute(select(NavSub))).scalars().all()
    subs_by_top: Dict[int, List[Dict]] = {}
    for s in subs:
        sub_name = (s.label or s.slug or "").strip()
        subs_by_top.setdefault(s.top_id, []).append({
            "label": s.label,
            "name": sub_name,  # back-compat for callers expecting "name"
            "slug": s.slug,
            "href": s.href,
        })
    cats: Dict[str, Dict] = {}
    for t in tops:
        top_label = (t.label or t.slug or "").strip()
        cats[top_label] = {
            "label": t.label,
            "name": top_label,  # back-compat
            "slug": t.slug,
            "subs": sorted(subs_by_top.get(t.id, []), key=lambda x: (x["name"] or "").lower()),
        }
    return {"cats": cats}
 async def db_product_full(session, slug: str, user_id=0) -> Optional[dict]:
    liked_product_ids_subq = (
        select(ProductLike.product_slug)
        .where(
            and_(
                ProductLike.user_id == user_id,
                ProductLike.deleted_at.is_(None)        
            )
        )
    )
    is_liked_case = case(
        (and_(
          (Product.slug.in_(liked_product_ids_subq)),
          Product.deleted_at.is_(None)   
        ), True),
        else_=False
    ).label("is_liked")
    q = (
        select(Product, is_liked_case)
        .where(Product.slug == slug, Product.deleted_at.is_(None))
        .options(
            selectinload(Product.images.and_(ProductImage.deleted_at.is_(None))),
            selectinload(Product.sections.and_(ProductSection.deleted_at.is_(None))),
            selectinload(Product.labels.and_(ProductLabel.deleted_at.is_(None))),
            selectinload(Product.stickers.and_(ProductSticker.deleted_at.is_(None))),
            selectinload(Product.attributes.and_(ProductAttribute.deleted_at.is_(None))),
            selectinload(Product.nutrition.and_(ProductNutrition.deleted_at.is_(None))),
            selectinload(Product.allergens.and_(ProductAllergen.deleted_at.is_(None))),
        )
    )
    result = await session.execute(q)
    row = result.first() if result is not None else None
    p, is_liked = row if row else (None, None)
    if not p:
        return None
    gallery = [
        img.url
        for img in sorted(p.images, key=lambda i: (i.kind or "gallery", i.position or 0))
        if (img.kind or "gallery") == "gallery"
    ]
    embedded = [
        img.url
        for img in sorted(p.images, key=lambda i: i.position or 0)
        if (img.kind or "") == "embedded"
    ]
    all_imgs = [
        img.url
        for img in sorted(p.images, key=lambda i: i.position or 0)
        if (img.kind or "") == "all"
    ]
    return {
        "id": p.id,
        "slug": p.slug,
        "title": p.title,
        "brand": p.brand,
        "image": p.image,
        "description_short": p.description_short,
        "description_html": p.description_html,
        "suma_href": p.suma_href,
        "rrp": float(p.rrp) if p.rrp is not None else None,
        "special_price": float(p.special_price) if p.special_price is not None else None,
        "special_price_raw": p.special_price_raw,
        "special_price_currency": p.special_price_currency,
        "regular_price": _regular_price_of(p),
        "regular_price_raw": p.regular_price_raw,
        "regular_price_currency": p.regular_price_currency,
        "rrp_raw": p.rrp_raw,
        "rrp_currency": p.rrp_currency,
        "price_per_unit_raw": p.price_per_unit_raw,
        "price_per_unit": p.price_per_unit,
        "price_per_unit_currency": p.price_per_unit_currency,
        "oe_list_price": p.oe_list_price,
        "images": gallery,
        "embedded_image_urls": embedded,
        "all_image_urls": all_imgs,
        "sections": [{"title": s.title, "html": s.html} for s in p.sections],
        "stickers": [v.name.strip().lower() for v in p.stickers if v.name],
        "labels": [v.name for v in p.labels if v.name],
        "ean": p.ean,
        "sku": p.sku,
        "unit_size": p.unit_size,
        "pack_size": p.pack_size,
        "case_size_raw": p.case_size_raw,
        "case_size_count": p.case_size_count,
        "case_size_item_qty": p.case_size_item_qty,
        "case_size_item_unit": p.case_size_item_unit,
        "info_table": {a.key: a.value for a in p.attributes if a.key},
        "nutrition": [{"key": n.key, "value": n.value, "unit": n.unit} for n in p.nutrition if n.key],
        "allergens": [{"name": a.name, "contains": a.contains} for a in p.allergens if a.name],
        "is_liked": is_liked,
        "deleted_at": p.deleted_at
    }
 async def db_product_full_id(session, id:int, user_id=0) -> Optional[dict]:
    liked_product_ids_subq = (
        select(ProductLike.product_slug)
        .where(
            and_(
                ProductLike.user_id == user_id,
                ProductLike.deleted_at.is_(None)        
            )
        )
    )
    is_liked_case = case(
        (
          (Product.slug.in_(liked_product_ids_subq)),
          True
        ),
        else_=False
    ).label("is_liked")
    q = (
        select(Product, is_liked_case)
        .where(Product.id == id)
        .options(
            selectinload(Product.images.and_(ProductImage.deleted_at.is_(None))),
            selectinload(Product.sections.and_(ProductSection.deleted_at.is_(None))),
            selectinload(Product.labels.and_(ProductLabel.deleted_at.is_(None))),
            selectinload(Product.stickers.and_(ProductSticker.deleted_at.is_(None))),
            selectinload(Product.attributes.and_(ProductAttribute.deleted_at.is_(None))),
            selectinload(Product.nutrition.and_(ProductNutrition.deleted_at.is_(None))),
            selectinload(Product.allergens.and_(ProductAllergen.deleted_at.is_(None))),
        )
    )
    result = await session.execute(q)
    row = result.first() if result is not None else None
    p, is_liked = row if row else (None, None)
    if not p:
        return None
    gallery = [
        img.url
        for img in sorted(p.images, key=lambda i: (i.kind or "gallery", i.position or 0))
        if (img.kind or "gallery") == "gallery"
    ]
    embedded = [
        img.url
        for img in sorted(p.images, key=lambda i: i.position or 0)
        if (img.kind or "") == "embedded"
    ]
    all_imgs = [
        img.url
        for img in sorted(p.images, key=lambda i: i.position or 0)
        if (img.kind or "") == "all"
    ]
    return {
        "id": p.id,
        "slug": p.slug,
        "title": p.title,
        "brand": p.brand,
        "image": p.image,
        "description_short": p.description_short,
        "description_html": p.description_html,
        "suma_href": p.suma_href,
        "rrp": float(p.rrp) if p.rrp is not None else None,
        "special_price": float(p.special_price) if p.special_price is not None else None,
        "special_price_raw": p.special_price_raw,
        "special_price_currency": p.special_price_currency,
        "regular_price": _regular_price_of(p),
        "regular_price_raw": p.regular_price_raw,
        "regular_price_currency": p.regular_price_currency,
        "rrp_raw": p.rrp_raw,
        "rrp_currency": p.rrp_currency,
        "price_per_unit_raw": p.price_per_unit_raw,
        "price_per_unit": p.price_per_unit,
        "price_per_unit_currency": p.price_per_unit_currency,
        "oe_list_price": p.oe_list_price,
        "images": gallery,
        "embedded_image_urls": embedded,
        "all_image_urls": all_imgs,
        "sections": [{"title": s.title, "html": s.html} for s in p.sections],
        "stickers": [v.name.strip().lower() for v in p.stickers if v.name],
        "labels": [v.name for v in p.labels if v.name],
        "ean": p.ean,
        "sku": p.sku,
        "unit_size": p.unit_size,
        "pack_size": p.pack_size,
        "case_size_raw": p.case_size_raw,
        "case_size_count": p.case_size_count,
        "case_size_item_qty": p.case_size_item_qty,
        "case_size_item_unit": p.case_size_item_unit,
        "info_table": {a.key: a.value for a in p.attributes if a.key},
        "nutrition": [{"key": n.key, "value": n.value, "unit": n.unit} for n in p.nutrition if n.key],
        "allergens": [{"name": a.name, "contains": a.contains} for a in p.allergens if a.name],
        "is_liked": is_liked,
        "deleted_at": p.deleted_at
    }
 # ---------- PRODUCTS LISTING ----------
 async def db_products_nocounts(
    session,
    top_slug: str | None,
    sub_slug: str | None,
    selected_brands: Optional[List[str]] = None,
    selected_stickers: Optional[List[str]] = None,
    selected_labels: Optional[List[str]] = None,
    page: int = 1,
    search: Optional[str] = None,
    sort: Optional[str] = None,
    page_size: int = 20,
    liked: bool = None,
    user_id: int=0
 ) -> Dict:
    BLOCKED_SLUGS = set((config().get("blacklist", {}).get("product", []) or []))
    base_conditions = []
    if BLOCKED_SLUGS:
        base_conditions.append(
            ~Product.slug.in_(BLOCKED_SLUGS),
          )
    if top_slug:
        q_list = (
            select(Listing.id)
            .join(NavTop, Listing.top)
            .outerjoin(NavSub, Listing.sub)
            .where(
                Listing.deleted_at.is_(None),
                NavTop.deleted_at.is_(None),
                NavTop.slug == top_slug,
                NavSub.deleted_at.is_(None),
                NavSub.slug == sub_slug if sub_slug else Listing.sub_id.is_(None),
            )
        )
        listing_id = (await session.execute(q_list)).scalars().first()
        if not listing_id:
            return {"total_pages": 1, "items": []}
        base_conditions.append(Product.slug.in_(
            select(ListingItem.slug).where(ListingItem.listing_id == listing_id, ListingItem.deleted_at.is_(None))
        ))
    base_ids_subq = select(Product.id).where(*base_conditions, Product.deleted_at.is_(None))
    base_ids = (await session.execute(base_ids_subq)).scalars().all()
    if not base_ids:
        return {"total_pages": 1, "items": []}
    sel_brands = [(b or "").strip().lower() for b in (selected_brands or []) if (b or "").strip()]
    sel_stickers = [(s or "").strip().lower() for s in (selected_stickers or []) if (s or "").strip()]
    sel_labels = [(l or "").strip().lower() for l in (selected_labels or []) if (l or "").strip()]
    search_q = (search or "").strip().lower()
    filter_conditions = []
    if sel_brands:
        filter_conditions.append(func.lower(Product.brand).in_(sel_brands))
    for sticker_name in sel_stickers:
        filter_conditions.append(
            Product.stickers.any(
                and_(
                    func.lower(ProductSticker.name) == sticker_name,
                    ProductSticker.deleted_at.is_(None)
                )
            )
        )
    for label_name in sel_labels:
        filter_conditions.append(
            Product.labels.any(
                and_(
                    func.lower(ProductLabel.name) == label_name,
                    ProductLabel.deleted_at.is_(None),
                )
            )
        )
    if search_q:
        filter_conditions.append(func.lower(Product.description_short).contains(search_q))
    if liked:
        liked_subq = liked_subq = (
            select(ProductLike.product_slug)
            .where(
                and_(
                    ProductLike.user_id == user_id,
                    ProductLike.deleted_at.is_(None)
                )
            )
            .subquery()
        )
        filter_conditions.append(Product.slug.in_(liked_subq))
    filtered_count_query = select(func.count(Product.id)).where(Product.id.in_(base_ids), *filter_conditions)
    total_filtered = (await session.execute(filtered_count_query)).scalars().one()
    total_pages = max(1, (total_filtered + page_size - 1) // page_size)
    page = max(1, page)
    liked_product_slugs_subq = (
        select(ProductLike.product_slug)
        .where(
            and_(
                ProductLike.user_id == user_id,
                ProductLike.deleted_at.is_(None)
            )
        )
    )
    is_liked_case = case(
        (Product.slug.in_(liked_product_slugs_subq), True),
        else_=False
    ).label("is_liked")
    q_filtered = select(Product, is_liked_case).where(Product.id.in_(base_ids), *filter_conditions).options(
        selectinload(Product.images),
        selectinload(Product.sections),
        selectinload(Product.labels),
        selectinload(Product.stickers),
        selectinload(Product.attributes),
        selectinload(Product.nutrition),
        selectinload(Product.allergens),
    )
    sort_mode = (sort or "az").strip().lower()
    if sort_mode == "az":
        q_filtered = q_filtered.order_by(func.lower(Product.title), Product.slug)
    elif sort_mode == "za":
        q_filtered = q_filtered.order_by(func.lower(Product.title).desc(), Product.slug.desc())
    elif sort_mode in ("price-asc", "price_asc", "price-low", "price-low-high", "low-high", "lo-hi"):
        q_filtered = q_filtered.order_by(
            case((Product.regular_price.is_(None), 1), else_=0),
            Product.regular_price.asc(),
            func.lower(Product.title),
            Product.slug
        )
    elif sort_mode in ("price-desc", "price_desc", "price-high", "price-high-low", "high-low", "hi-lo"):
        q_filtered = q_filtered.order_by(
            case((Product.regular_price.is_(None), 1), else_=0),
            Product.regular_price.desc(),
            func.lower(Product.title),
            Product.slug
        )
    else:
        q_filtered = q_filtered.order_by(func.lower(Product.title), Product.slug)
    offset_val = (page - 1) * page_size
    q_filtered = q_filtered.offset(offset_val).limit(page_size)
    products_page = (await session.execute(q_filtered)).all()
    items: List[Dict] = []
    for p, is_liked in products_page:
        gallery_imgs = sorted((img for img in p.images), key=lambda i: (i.kind or "gallery", i.position or 0))
        gallery = [img.url for img in gallery_imgs if (img.kind or "gallery") == "gallery"]
        embedded = [img.url for img in sorted(p.images, key=lambda i: i.position or 0) if (img.kind or "") == "embedded"]
        all_imgs = [img.url for img in sorted(p.images, key=lambda i: i.position or 0) if (img.kind or "") == "all"]
        items.append({
            "slug": p.slug,
            "title": p.title,
            "brand": p.brand,
            "description_short": p.description_short,
            "description_html": p.description_html,
            "image": p.image,
            "rrp": float(p.rrp) if p.rrp is not None else None,
            "special_price": float(p.special_price) if p.special_price is not None else None,
            "special_price_raw": p.special_price_raw,
            "special_price_currency": p.special_price_currency,
            "regular_price": _regular_price_of(p),
            "regular_price_raw": p.regular_price_raw,
            "regular_price_currency": p.regular_price_currency,
            "rrp_raw": p.rrp_raw,
            "rrp_currency": p.rrp_currency,
            "price_per_unit_raw": p.price_per_unit_raw,
            "price_per_unit": p.price_per_unit,
            "price_per_unit_currency": p.price_per_unit_currency,    
            "images": gallery,
            "embedded_image_urls": embedded,
            "all_image_urls": all_imgs,
            "sections": [{"title": s.title, "html": s.html} for s in p.sections],
            "labels": [l.name for l in p.labels if l.name],
            "stickers": [s.name.strip().lower() for s in p.stickers if s.name],
            "info_table": {a.key: a.value for a in p.attributes if a.key},
            "nutrition": [{"key": n.key, "value": n.value, "unit": n.unit} for n in p.nutrition if n.key],
            "allergens": [{"name": a.name, "contains": a.contains} for a in p.allergens if a.name],
            "ean": p.ean,
            "sku": p.sku,
            "unit_size": p.unit_size,
            "pack_size": p.pack_size,
            "is_liked": is_liked,
        })
    return {
        "total_pages": total_pages,
        "items": items,
    }
 async def db_products_counts(
    session,
    top_slug: str | None,
    sub_slug: str | None,
    search: Optional[str] = None,
    user_id: int=0
 ) -> Dict:
    BLOCKED_SLUGS = set((config().get("blacklist", {}).get("product", []) or []))
    base_conditions = []
    if top_slug:
        q_list = select(Listing.id).where(
            Listing.deleted_at.is_(None),
            Listing.top.has(slug=top_slug),
            Listing.sub.has(slug=sub_slug) if sub_slug else Listing.sub_id.is_(None),
        )
        listing_id = (await session.execute(q_list)).scalars().first()
        if not listing_id:
            return {
                "brands": [],
                "stickers": [],
                "labels": [],
                "liked_count": 0,
                "search_count": 0,
            }
        listing_slug_subquery = select(ListingItem.slug).where(ListingItem.listing_id == listing_id, ListingItem.deleted_at.is_(None))
        if BLOCKED_SLUGS:
            base_conditions.append(
                and_(
                    Product.slug.in_(listing_slug_subquery),
                    ~Product.slug.in_(BLOCKED_SLUGS),
                )
            )
        else:
            base_conditions.append(Product.slug.in_(listing_slug_subquery))
    else:
        if BLOCKED_SLUGS:
            base_conditions.append(~Product.slug.in_(BLOCKED_SLUGS))
    base_ids = (await session.execute(select(Product.id).where(*base_conditions, Product.deleted_at.is_(None)))).scalars().all()
    if base_ids:
        base_products_slugs = (await session.execute(
            select(Product.slug).where(Product.id.in_(base_ids), Product.deleted_at.is_(None))
        )).scalars().all()
        if not base_products_slugs:
            return {
                "brands": [],
                "stickers": [],
                "labels": [],
                "liked_count": 0,
                "search_count": 0,
            }
        base_ids = (await session.execute(
            select(Product.id).where(Product.slug.in_(base_products_slugs), Product.deleted_at.is_(None))
        )).scalars().all()
    else:
        return {
            "brands": [],
            "stickers": [],
            "labels": [],
            "liked_count": 0,
            "search_count": 0,
        }
    brands_list: List[Dict] = []
    stickers_list: List[Dict] = []
    labels_list: List[Dict] = []
    liked_count = 0
    search_count = 0
    liked_product_slugs_subq = (
        select(ProductLike.product_slug)
        .where(ProductLike.user_id == user_id, ProductLike.deleted_at.is_(None))
    )
    liked_count = await session.scalar(
      select(func.count(Product.id))
      .where(
          Product.id.in_(base_ids),
          Product.slug.in_(liked_product_slugs_subq),
          Product.deleted_at.is_(None)
      )
    )
    liked_count = (await session.execute(
        select(func.count())
        .select_from(ProductLike)
        .where(
            ProductLike.user_id == user_id,
            ProductLike.product_slug.in_(
                select(Product.slug).where(Product.id.in_(base_ids))
            ),
            ProductLike.deleted_at.is_(None)
        )
    )).scalar_one() if user_id else 0
    # Brand counts
    brand_count_rows = await session.execute(
        select(Product.brand, func.count(Product.id))
        .where(Product.id.in_(base_ids),
                Product.brand.is_not(None),
                func.trim(Product.brand) != "",
                Product.deleted_at.is_(None)
        )
        .group_by(Product.brand)
    )
    for brand_name, count in brand_count_rows:
        brands_list.append({"name": brand_name, "count": count})
    brands_list.sort(key=lambda x: (-x["count"], x["name"].lower()))
    # Sticker counts
    sticker_count_rows = await session.execute(
        select(ProductSticker.name, func.count(ProductSticker.product_id))
        .where(
            ProductSticker.product_id.in_(base_ids),
            ProductSticker.deleted_at.is_(None)
        )
        .group_by(ProductSticker.name)
    )
    for sticker_name, count in sticker_count_rows:
        if sticker_name:
            stickers_list.append({"name": sticker_name.strip().lower(), "count": count})
    stickers_list.sort(key=lambda x: (-x["count"], x["name"]))
    # Label counts
    label_count_rows = await session.execute(
        select(ProductLabel.name, func.count(ProductLabel.product_id))
        .where(
            ProductLabel.product_id.in_(base_ids),
            ProductLabel.deleted_at.is_(None)
        )
        .group_by(ProductLabel.name)
    )
    for label_name, count in label_count_rows:
        if label_name:
            labels_list.append({"name": label_name, "count": count})
    labels_list.sort(key=lambda x: (-x["count"], x["name"]))
    # Search count
    search_q = (search or "").strip().lower()
    if search_q:
        search_count = (await session.execute(
            select(func.count(Product.id))
            .where(
                Product.id.in_(base_ids),
                func.lower(Product.description_short).contains(search_q),
                Product.deleted_at.is_(None)
              )
        )).scalars().one()
    else:
        search_count = len(base_ids)
    return {
        "brands": brands_list,
        "stickers": stickers_list,
        "labels": labels_list,
        "liked_count": liked_count,
        "search_count": search_count,
    }
 async def db_products(
    session,
    top_slug: str | None,
    sub_slug: str | None,
    selected_brands: Optional[List[str]] = None,
    selected_stickers: Optional[List[str]] = None,
    selected_labels: Optional[List[str]] = None,
    page: int = 1,
    search: Optional[str] = None,
    sort: Optional[str] = None,
    page_size: int = 20,
    liked: bool = None,
    user_id: int=0
 ) -> Dict:
      return {
        **(await db_products_nocounts(
            session,
            top_slug=top_slug,
            sub_slug=sub_slug,
            selected_brands=selected_brands,
            selected_stickers=selected_stickers,
            selected_labels=selected_labels,
            page=page,
            search=search,
            sort=sort,
            page_size=page_size,
            liked=liked,
            user_id=user_id
        )),
        **(await db_products_counts(
            session,
            top_slug=top_slug,
            sub_slug=sub_slug,
            search=search,
            user_id=user_id
        )),
    }
--- a/bp/browse/services/nav.py
+++ b/bp/browse/services/nav.py
@@ -0,0 +1,163 @@
 from __future__ import annotations
 import time
 import re
 from typing import Dict, List, Tuple, Optional
 from urllib.parse import urlparse, urljoin
 from config import config
 from . import db_backend as cb
 from .blacklist.category import is_category_blocked  # Reverse map: slug -> label
 # ------------------ Caches ------------------
 _nav_cache: Dict = {}
 _nav_cache_ts: float = 0.0
 _nav_ttl_seconds = 60 * 60 * 6  # 6 hours
 def _now() -> float:
    try:
        return now()  # type: ignore[name-defined]
    except Exception:
        return time.time()
 def extract_sub_slug(href: str, top_slug: str) -> Optional[str]:
    p = urlparse(href)
    parts = [x for x in (p.path or "").split("/") if x]
    if len(parts) >= 2 and parts[0].lower() == top_slug.lower():
        sub = parts[1]
        if sub.lower().endswith((".html", ".htm")):
            sub = re.sub(r"\.(html?|HTML?)$", "", sub)
        return sub
    return None
 def group_by_category(slug_to_links: Dict[str, List[Tuple[str, str]]]) -> Dict[str, Dict]:
    nav = {"cats": {}}
    for label, slug in config()["categories"]["allow"].items():
        top_href = urljoin(config()["base_url"], f"/{slug}")
        subs = []
        for text, href in slug_to_links.get(slug, []):
            sub_slug = extract_sub_slug(href, slug)
            if sub_slug:
                subs.append({
                    "name": text,
                    "href": href,
                    "slug": sub_slug,
                    # no count here yet in this path
                })
        subs.sort(key=lambda x: x["name"].lower())
        nav["cats"][label] = {"href": top_href, "slug": slug, "subs": subs}
    nav = _apply_category_blacklist(nav)
    return nav
 async def get_nav(session) -> Dict[str, Dict]:
    """
    Return navigation structure; annotate each sub with product counts.
    Uses snapshot for offline behaviour.
    """
    global _nav_cache, _nav_cache_ts
    now_ts = _now()
    # load from snapshot
    nav = await cb.db_nav(session)
    # inject counts for each subcategory (and for top-level too if you like)
    for label, cat in (nav.get("cats") or {}).items():
        top_slug = cat.get("slug")
        if not top_slug:
            continue
        # Counts for subs
        new_subs = []
        for s in cat.get("subs", []):
            s.get("slug")
            #if not sub_slug:
            #    s_count = 0
            #else:
            #    s_count = await cb.db_count_products_in_sub(session,top_slug, sub_slug)
            #print('sub', s_count)
            new_subs.append({
                **s,
                #"count": s_count,
            })
        cat["subs"] = new_subs
    _nav_cache = nav
    _nav_cache_ts = now_ts
    nav = _apply_category_blacklist(nav)
    return nav
 def category_context(top_slug: Optional[str], sub_slug: Optional[str], nav: Dict[str, Dict]):
    """Build template context for a category/subcategory page."""
    def _order_subs_selected_first(subs, sub_slug: str | None):
        """Return subs with the selected subcategory (by slug) first."""
        if not subs or not sub_slug:
            return subs
        head = [s for s in subs if sub_slug and sub_slug.lower() == s['slug']]
        tail = [s for s in subs if not (sub_slug and sub_slug.lower() == s['slug'])]
        return head + tail
    REVERSE_CATEGORY = {v: k for k, v in config()["categories"]["allow"].items()}
    label = REVERSE_CATEGORY.get(top_slug)
    cat = nav["cats"].get(label) or {}
    top_suma_href = cat.get("href") or urljoin(config()["base_url"], f"/{top_slug}")
    top_local_href = f"{top_slug}"
    # total products in this top-level category (all subs combined / top-level listing)
    top_count = cat.get("count", 0)
    subs = []
    for s in cat.get("subs", []):
        subs.append({
            "name": s["name"],
            "slug": s.get("slug"),
            "local_href": f"{top_slug}/{s.get('slug')}",
            "suma_href": s["href"],
            "count": s.get("count", 0),  # per-subcategory product count
        })
    current_local_href = (
        f"{top_slug}/{sub_slug}" if sub_slug
        else f"{top_slug}" if top_slug
        else ""
    )
    return {
        "category_label": label,
        "top_slug": top_slug,
        "sub_slug": sub_slug,
        "top_suma_href": top_suma_href,
        "top_local_href": top_local_href,
        # 👇 expose total count for the parent category
        "top_count": top_count,
        # list of subcategories, each with its own count
        "subs_local": _order_subs_selected_first(subs, sub_slug),
        #"current_local_href": current_local_href,
    }
 def _apply_category_blacklist(nav: Dict[str, Dict]) -> Dict[str, Dict]:
    cats = nav.get("cats", {})
    out = {"cats": {}}
    for label, data in cats.items():
        top = (data or {}).get("slug")
        if not top or is_category_blocked(top):
            continue
        # filter subs
        subs = []
        for s in (data.get("subs") or []):
            sub_slug = s.get("slug")
            if sub_slug and not is_category_blocked(top, sub_slug):
                subs.append(s)
        # keep everything else (including counts)
        out["cats"][label] = {**data, "subs": subs}
    return out
--- a/bp/browse/services/products.py
+++ b/bp/browse/services/products.py
@@ -0,0 +1,118 @@
 # products.py
 from __future__ import annotations
 from typing import List, Optional
 from urllib.parse import urlparse
 from .state import KNOWN_PRODUCT_SLUGS
 from .blacklist.category import is_category_blocked
 from . import db_backend as cb
 # NEW IMPORT:
 from quart import g
 async def products(
    list_url: str,
    selected_brands: Optional[List[str]] = None,
    selected_stickers: Optional[List[str]] = None,
    selected_labels: Optional[List[str]] = None,
    page: int = 1,
    search: Optional[str] = None,
    sort: Optional[str] = None,
    liked: Optional[bool] = None,
    # NEW:
    user_id: Optional[int] = None,
 ):
    p = urlparse(list_url)
    parts = [x for x in (p.path or "").split("/") if x]
    top = parts[0] if parts else None
    sub = parts[1] if len(parts) >= 2 else None
    if is_category_blocked(top, sub):
        return [], [], [], [], 1  # <- note: 5 values now, keep shape consistent below
    data = await cb.db_products(
        g.s,
        top,
        sub,
        selected_brands,
        selected_stickers,
        selected_labels,
        page,
        search,
        sort,
        liked=liked,
        user_id = g.user.id if g.user else 0
    )
    items = data.get("items", []) or []
    brands = data.get("brands", []) or []
    stickers = data.get("stickers", []) or []
    labels = data.get("labels", []) or []
    total_pages = int(data.get("total_pages", 1) or 1)
    # Track known product slugs
    for it in items:
        try:
            slug = it.get("slug")
            if slug:
                KNOWN_PRODUCT_SLUGS.add(slug)
        except Exception:
            pass
    # --- NEW BIT: mark which are liked by this user ---
    # Return same shape you were already returning:
    # items, brands, stickers, labels, total_pages
    return items, brands, stickers, labels, total_pages, data.get("liked_count"), data.get("search_count")
 async def products_nocounts(
    session,
    list_url: str,
    selected_brands: Optional[List[str]] = None,
    selected_stickers: Optional[List[str]] = None,
    selected_labels: Optional[List[str]] = None,
    page: int = 1,
    search: Optional[str] = None,
    sort: Optional[str] = None,
    liked: Optional[bool] = None,
    # NEW:
    user_id: Optional[int] = None,
 ):
    p = urlparse(list_url)
    parts = [x for x in (p.path or "").split("/") if x]
    top = parts[0] if parts else None
    sub = parts[1] if len(parts) >= 2 else None
    if is_category_blocked(top, sub):
        return [], [], [], [], 1  # <- note: 5 values now, keep shape consistent below
    data = await cb.db_products_nocounts(
        session,
        top,
        sub,
        selected_brands,
        selected_stickers,
        selected_labels,
        page,
        search,
        sort,
        liked=liked,
        user_id = g.user.id if g.user else 0,
    )
    items = data.get("items", []) or []
    total_pages = int(data.get("total_pages", 1) or 1)
    # Track known product slugs
    for it in items:
        try:
            slug = it.get("slug")
            if slug:
                KNOWN_PRODUCT_SLUGS.add(slug)
        except Exception:
            pass
    # Return same shape you were already returning:
    # items, brands, stickers, labels, total_pages
    return items, total_pages
--- a/bp/browse/services/services.py
+++ b/bp/browse/services/services.py
@@ -0,0 +1,179 @@
 from __future__ import annotations
 from urllib.parse import urljoin
 from quart import (
    g,
    request,
 )
 from config import config
 from .products import products, products_nocounts
 from .blacklist.product_details import is_blacklisted_heading
 from utils import host_url
 from sqlalchemy import select
 from models import ProductLike
 from ...market.filters.qs import decode
 def _hx_fragment_request() -> bool:
    return request.headers.get("HX-Request", "").lower() == "true"
 async def _productInfo(top_slug=None, sub_slug=None):
    """
    Shared query logic for home / category / subcategory pages.
    Pulls filters from qs.decode(), queries products(), and orders brands/stickers/etc.
    """
    q = decode()
    page, search, sort = q.page, q.search, q.sort
    selected_brands, selected_stickers, selected_labels = q.selected_brands, q.selected_stickers, q.selected_labels
    liked = q.liked
    if top_slug is not None and sub_slug is not None:
        list_url = urljoin(config()["base_url"], f"/{top_slug}/{sub_slug}")
    else:
        if top_slug is not None:
            list_url = top_slug
        else:
            list_url = ""
    if not _hx_fragment_request() or page==1:
      items, brands, stickers, labels, total_pages, liked_count, search_count = await products(
          list_url,
          selected_brands=selected_brands,
          selected_stickers=selected_stickers,
          selected_labels=selected_labels,
          page=page,
          search=search,
          sort=sort,
          user_id=g.user.id if g.user else None,
          liked = liked,
      )
      brands_ordered = _order_brands_selected_first(brands, selected_brands)
      return {
          "products": items,
          "page": page,
          "search": search,
          "sort": sort,
          "total_pages": int(total_pages or 1),
          "brands": brands_ordered,
          "selected_brands": selected_brands,
          "stickers": stickers,
          "selected_stickers": selected_stickers,
          "labels": labels,
          "selected_labels": selected_labels,
          "liked": liked,
          "liked_count": liked_count,
          "search_count": search_count
      }
    else:
        items, total_pages = await products_nocounts(
            g.s,
            list_url,
            selected_brands=selected_brands,
            selected_stickers=selected_stickers,
            selected_labels=selected_labels,
            page=page,
            search=search,
            sort=sort,
            user_id=g.user.id if g.user else None,
            liked = liked,
        )
        return {
            "products": items,
            "page": page,
            "search": search,
            "sort": sort,
            "total_pages": int(total_pages or 1),  
        }
 def _order_brands_selected_first(brands, selected):
    """Return brands with the selected brand(s) first."""
    if not brands or not selected:
        return brands
    sel = [(s or "").strip() for s in selected]
    head = [s for s in brands if (s.get("name") or "").strip() in sel]
    tail = [s for s in brands if (s.get("name") or "").strip() not in sel]
    return head + tail
 def _order_stickers_selected_first(
    stickers: list[dict], selected_stickers: list[str] | None
 ):
    if not stickers or not selected_stickers:
        return stickers
    sel = [(s or "").strip().lower() for s in selected_stickers]
    head = [s for s in stickers if (s.get("name") or "").strip().lower() in sel]
    tail = [
        s
        for s in stickers
        if (s.get("name") or "").strip().lower() not in sel
    ]
    return head + tail
 def _order_labels_selected_first(
    labels: list[dict], selected_labels: list[str] | None
 ):
    if not labels or not selected_labels:
        return labels
    sel = [(s or "").strip().lower() for s in selected_labels]
    head = [s for s in labels if (s.get("name") or "").strip().lower() in sel]
    tail = [
        s
        for s in labels
        if (s.get("name") or "").strip().lower() not in sel
    ]
    return head + tail
 def _massage_product(d):
    """
    Normalise the product dict for templates:
    - inject APP_ROOT into HTML
    - drop blacklisted sections
    """
    massaged = {
        **d,
        "description_html": d["description_html"].replace(
            "[**__APP_ROOT__**]", g.root
        ),
        "sections": [
            {
                **section,
                "html": section["html"].replace(
                    "[**__APP_ROOT__**]", g.root
                ),
            }
            for section in d["sections"]
            if not is_blacklisted_heading(section["title"])
        ],
    }
    return massaged
 # Re-export from canonical shared location
 from shared.http_utils import vary as _vary, current_url_without_page as _current_url_without_page
 async def _is_liked(user_id: int | None, slug: str) -> bool:
    """
    Check if this user has liked this product.
    """
    if not user_id:
        return False
    # because ProductLike has composite PK (user_id, product_slug),
    # we can fetch it by primary key dict:
    row = await g.s.execute(
        select(ProductLike).where(
            ProductLike.user_id == user_id,
            ProductLike.product_slug == slug,
        )
    )
    row.scalar_one_or_none()
    return row is not None
--- a/bp/browse/services/slugs.py
+++ b/bp/browse/services/slugs.py
@@ -0,0 +1,24 @@
 import re
 from urllib.parse import urljoin, urlparse
 from config import config
 def product_slug_from_href(href: str) -> str:
    p = urlparse(href)
    parts = [x for x in p.path.split("/") if x]
    if not parts:
        return ""
    last = parts[-1]
    if last.endswith(".html"):
        last = last[:-5]
    elif last.endswith(".htm"):
        last = last[:-4]
    last = re.sub(r"-(html|htm)+$", "", last, flags=re.I)
    return f"{last}-html"
 def canonical_html_slug(slug: str) -> str:
    base = re.sub(r"-(html|htm)+$", "", slug, flags=re.I)
    return f"{base}-html"
 def suma_href_from_html_slug(slug: str) -> str:
    canon = canonical_html_slug(slug)
    return urljoin(config()["base_url"], f"/{canon}.html")
--- a/bp/browse/services/state.py
+++ b/bp/browse/services/state.py
@@ -0,0 +1,21 @@
 from typing import Dict, Tuple, List
 import time
 _nav_cache: dict = {}
 _nav_cache_ts: float = 0.0
 _nav_ttl_seconds = 60 * 60 * 6
 _detail_cache: Dict[str, Dict] = {}
 _detail_cache_ts: Dict[str, float] = {}
 _detail_ttl_seconds = 60 * 60 * 6
 KNOWN_PRODUCT_SLUGS: set[str] = set()
 _listing_variant_cache: Dict[str, Tuple[str, float]] = {}
 _listing_variant_ttl = 60 * 60 * 6
 _listing_page_cache: Dict[str, Tuple[Tuple[List[Dict], int], float]] = {}
 _listing_page_ttl = 60 * 30
 def now() -> float:
    return time.time()
--- a/bp/market/init.py
+++ b/bp/market/init.py
@@ -0,0 +1,7 @@
 from __future__ import annotations
 # create the blueprint at package import time
 from .routes import register # = Blueprint("browse_bp", __name__)
 # import routes AFTER browse_bp is defined so routes can attach to it
 from . import routes  # noqa: F401
--- a/bp/market/admin/init.py
+++ b/bp/market/admin/init.py
--- a/bp/market/admin/routes.py
+++ b/bp/market/admin/routes.py
@@ -0,0 +1,28 @@
 from __future__ import annotations
 from quart import (
    render_template, make_response, Blueprint
 )
 from suma_browser.app.authz import require_admin
 def register():
    bp = Blueprint("admin", __name__, url_prefix='/admin')
    # ---------- Pages ----------
    @bp.get("/")
    @require_admin
    async def admin():
        from suma_browser.app.utils.htmx import is_htmx_request
        # Determine which template to use based on request type
        if not is_htmx_request():
            # Normal browser request: full page with layout
            html = await render_template("_types/market/admin/index.html")
        else:
            html = await render_template("_types/market/admin/_oob_elements.html")
        return await make_response(html)
    return bp
--- a/bp/market/filters/init.py
+++ b/bp/market/filters/init.py
--- a/bp/market/filters/qs.py
+++ b/bp/market/filters/qs.py
@@ -0,0 +1,101 @@
 from quart import request
 from typing import Iterable, Optional, Union
 from suma_browser.app.filters.qs_base import (
    KEEP, _norm, make_filter_set, build_qs,
 )
 from suma_browser.app.filters.query_types import MarketQuery
 def decode() -> MarketQuery:
    page = int(request.args.get("page", 1))
    search = request.args.get("search")
    sort = request.args.get("sort")
    liked = request.args.get("liked")
    selected_brands = tuple(s.strip() for s in request.args.getlist("brand") if s.strip())
    selected_stickers = tuple(s.strip().lower() for s in request.args.getlist("sticker") if s.strip())
    selected_labels = tuple(s.strip().lower() for s in request.args.getlist("label") if s.strip())
    return MarketQuery(page, search, sort, selected_brands, selected_stickers, selected_labels, liked)
 def makeqs_factory():
    """
    Build a makeqs(...) that starts from the current filters + page.
    Auto-resets page to 1 when filters change unless you pass page explicitly.
    """
    q = decode()
    base_stickers = [s for s in q.selected_stickers if (s or "").strip()]
    base_labels = [s for s in q.selected_labels if (s or "").strip()]
    base_brands = [s for s in q.selected_brands if (s or "").strip()]
    base_search = q.search or None
    base_liked = q.liked or None
    base_sort = q.sort or None
    base_page = int(q.page or 1)
    def makeqs(
        *,
        clear_filters: bool = False,
        add_sticker: Union[str, Iterable[str], None] = None,
        remove_sticker: Union[str, Iterable[str], None] = None,
        add_label: Union[str, Iterable[str], None] = None,
        remove_label: Union[str, Iterable[str], None] = None,
        add_brand: Union[str, Iterable[str], None] = None,
        remove_brand: Union[str, Iterable[str], None] = None,
        search: Union[str, None, object] = KEEP,
        sort: Union[str, None, object] = KEEP,
        page: Union[int, None, object] = None,
        extra: Optional[Iterable[tuple]] = None,
        leading_q: bool = True,
        liked: Union[bool, None, object] = KEEP,
    ) -> str:
        stickers = make_filter_set(base_stickers, add_sticker, remove_sticker, clear_filters)
        labels = make_filter_set(base_labels, add_label, remove_label, clear_filters)
        brands = make_filter_set(base_brands, add_brand, remove_brand, clear_filters)
        final_search = None if clear_filters else base_search if search is KEEP else ((search or "").strip() or None)
        final_sort = base_sort if sort is KEEP else (sort or None)
        final_liked = None if clear_filters else base_liked if liked is KEEP else liked
        # Did filters change?
        filters_changed = (
            set(map(_norm, stickers)) != set(map(_norm, base_stickers))
            or set(map(_norm, labels)) != set(map(_norm, base_labels))
            or set(map(_norm, brands)) != set(map(_norm, base_brands))
            or final_search != base_search
            or final_sort != base_sort
            or final_liked != base_liked
        )
        # Page logic
        if page is KEEP:
            final_page = 1 if filters_changed else base_page
        else:
            final_page = page
        # Build params
        params = []
        for s in stickers:
            params.append(("sticker", s))
        for s in labels:
            params.append(("label", s))
        for s in brands:
            params.append(("brand", s))
        if final_search:
            params.append(("search", final_search))
        if final_liked is not None:
            params.append(("liked", final_liked))
        if final_sort:
            params.append(("sort", final_sort))
        if final_page is not None:
            params.append(("page", str(final_page)))
        if extra:
            for k, v in extra:
                if v is not None:
                    params.append((k, str(v)))
        return build_qs(params, leading_q=leading_q)
    return makeqs
--- a/bp/market/routes.py
+++ b/bp/market/routes.py
@@ -0,0 +1,44 @@
 from __future__ import annotations
 from quart import Blueprint, g, render_template, make_response, url_for
 from ..browse.routes import register as register_browse_bp
 from .filters.qs import makeqs_factory
 from ..browse.services.nav import get_nav
 from ..api.routes import products_api
 from .admin.routes import register as register_admin
 def register(url_prefix, title):
  bp = Blueprint("market", __name__, url_prefix)
  @bp.before_request
  def route():
    g.makeqs_factory = makeqs_factory
  @bp.context_processor
  async def inject_root():
    return {
      "coop_title": title,
      "categories": (await get_nav(g.s))["cats"],
      "qs": makeqs_factory()(),
    }
  bp.register_blueprint(
    register_browse_bp(),
  )
  bp.register_blueprint(
    products_api,
  )
  bp.register_blueprint(
    register_admin(),
  )
  return bp
--- a/bp/product/routes.py
+++ b/bp/product/routes.py
@@ -0,0 +1,248 @@
 from __future__ import annotations
 from quart import (
    g,
    Blueprint,
    abort,
    redirect,
    render_template,
    make_response,
 )
 from sqlalchemy import select, func, update
 from models.market import Product, ProductLike
 from ..browse.services.slugs import canonical_html_slug
 from ..browse.services.blacklist.product import is_product_blocked
 from ..browse.services import db_backend as cb
 from ..browse.services import _massage_product
 from utils import host_url
 from suma_browser.app.redis_cacher import cache_page, clear_cache
 from ..cart.services import total
 from .services.product_operations import toggle_product_like, massage_full_product
 def register():
    bp = Blueprint("product", __name__, url_prefix="/product/<slug>")
    @bp.url_value_preprocessor
    def pull_blog(endpoint, values):
      g.product_slug = values.get("slug")
    # ─────────────────────────────────────────────────────────────
    # BEFORE REQUEST: Slug or numeric ID resolver
    # ─────────────────────────────────────────────────────────────
    @bp.before_request
    async def resolve_product():
        raw_slug = g.product_slug = getattr(g, "product_slug", None)
        if raw_slug is None:
            return
        # 1. If slug is INT → load product by ID
        if raw_slug.isdigit():
            product_id = int(raw_slug)
            product = await cb.db_product_full_id(
                g.s, product_id, user_id=g.user.id if g.user else 0
            )
            if not product:
                abort(404)
            # If product is deleted → SHOW as-is
            if product["deleted_at"]:
                d = product
                g.item_data = {"d": d, "slug": product["slug"], "liked": False}
                return
            # Not deleted → redirect to canonical slug
            canon = canonical_html_slug(product["slug"])
            return redirect(
                host_url(url_for("market.browse.product.product_detail", slug=canon))
            )
        # 2. Normal slug-based behaviour
        if is_product_blocked(raw_slug):
            abort(404)
        canon = canonical_html_slug(raw_slug)
        if canon != raw_slug:
            return redirect(
                host_url(url_for("product.product_detail", slug=canon))
            )
        # hydrate full product
        d = await cb.db_product_full(
            g.s, canon, user_id=g.user.id if g.user else 0
        )
        if not d:
            abort(404)
        g.item_data = {"d": d, "slug": canon, "liked": d["is_liked"]}
    @bp.context_processor
    def context():
      item_data = getattr(g, "item_data", None)
      if item_data:
          return {
              **item_data,
          }
      else:
         return {}  
    # ─────────────────────────────────────────────────────────────
    # RENDER PRODUCT
    # ─────────────────────────────────────────────────────────────
    @bp.get("/")
    @cache_page(tag="browse")
    async def product_detail(slug: str):
        from suma_browser.app.utils.htmx import is_htmx_request
        # Determine which template to use based on request type
        if not is_htmx_request():
            # Normal browser request: full page with layout
            html = await render_template("_types/product/index.html")
        else:
            # HTMX request: main panel + OOB elements
            html = await render_template("_types/product/_oob_elements.html")
        return html
    @bp.post("/like/toggle/")
    @clear_cache(tag="browse", tag_scope="user")
    async def like_toggle(slug):
      # Use slug from URL parameter (set by url_prefix="/product/<slug>")
      product_slug = slug
      if not g.user:
          html = await render_template(
              "_types/browse/like/button.html",
              slug=product_slug,
              liked=False,
          )
          resp = make_response(html, 403)
          return resp
      user_id = g.user.id
      liked, error = await toggle_product_like(g.s, user_id, product_slug)
      if error:
          resp = make_response(error, 404)
          return resp
      html = await render_template(
          "_types/browse/like/button.html",
          slug=product_slug,
          liked=liked,
      )
      return html
    @bp.get("/admin/")
    async def admin(slug: str):
      from suma_browser.app.utils.htmx import is_htmx_request
      if not is_htmx_request():
        # Normal browser request: full page with layout
        html = await render_template("_types/product/admin/index.html")
      else:
        # HTMX request: main panel + OOB elements
        html = await render_template("_types/product/admin/_oob_elements.html")
      return await make_response(html)
    from suma_browser.app.bp.cart.services.identity import current_cart_identity
    #from suma_browser.app.bp.cart.routes import view_cart
    from models.market import CartItem
    from quart import request, url_for
    @bp.post("/cart/")
    @clear_cache(tag="browse", tag_scope="user")
    async def cart(slug: str):
        # make sure product exists (we *allow* deleted_at != None later if you want)
        product_id = await g.s.scalar(
            select(Product.id).where(
                Product.slug == slug,
                Product.deleted_at.is_(None),
            )
        )
        product = await g.s.scalar(
            select(Product).where(Product.id == product_id)
        )
        if not product:
            return await make_response("Product not found", 404)
        # --- NEW: read `count` from body (JSON or form), default to 1 ---
        count = 1
        try:
            if request.is_json:
                data = await request.get_json()
                if data is not None and "count" in data:
                    count = int(data["count"])
            else:
                form = await request.form
                if "count" in form:
                    count = int(form["count"])
        except (ValueError, TypeError):
            # if parsing fails, just fall back to 1
            count = 1
        # --- END NEW ---
        ident = current_cart_identity()
        filters = [CartItem.deleted_at.is_(None), CartItem.product_id == product_id]
        if ident["user_id"] is not None:
            filters.append(CartItem.user_id == ident["user_id"])
        else:
            filters.append(CartItem.session_id == ident["session_id"])
        ci = next(
            (item for item in g.cart if item.product_id == product_id),
            None,
        )
        # --- NEW: set quantity based on `count` ---
        if ci:
            if count > 0:
                ci.quantity = count
            else:
                # count <= 0 → remove from cart entirely
                ci.quantity=0
                g.cart.remove(ci)
                await g.s.delete(ci)
        else:
            if count > 0:
                ci = CartItem(
                    user_id=ident["user_id"],
                    session_id=ident["session_id"],
                    product_id=product.id,
                    product=product,
                    quantity=count,
                )
                g.cart.append(ci)
                g.s.add(ci)
            # if count <= 0 and no existing item, do nothing
        # --- END NEW ---
        # no explicit commit; your session middleware should handle it
        # htmx support (optional)
        if request.headers.get("HX-Request") == "true":
            # You can return a small fragment or mini-cart here
            return await render_template(
                "_types/product/_added.html",
                cart=g.cart,
                item=ci,
                total = total
            )
        # normal POST: go to cart page
        return redirect(url_for("cart.view_cart"))
    return bp
--- a/bp/product/services/init.py
+++ b/bp/product/services/init.py
@@ -0,0 +1,3 @@
 from .product_operations import toggle_product_like, massage_full_product
 __all__ = ["toggle_product_like", "massage_full_product"]
--- a/bp/product/services/product_operations.py
+++ b/bp/product/services/product_operations.py
@@ -0,0 +1,95 @@
 from __future__ import annotations
 from typing import Optional
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from models.market import Product, ProductLike
 def massage_full_product(product: Product) -> dict:
    """
    Convert a Product ORM model to a dictionary with all fields.
    Used for rendering product detail pages.
    """
    from suma_browser.app.bp.browse.services import _massage_product
    gallery = []
    if product.image:
        gallery.append(product.image)
    d = {
        "id": product.id,
        "slug": product.slug,
        "title": product.title,
        "brand": product.brand,
        "image": product.image,
        "description_short": product.description_short,
        "description_html": product.description_html or "",
        "suma_href": product.suma_href,
        "rrp": float(product.rrp) if product.rrp else None,
        "special_price": float(product.special_price) if product.special_price else None,
        "regular_price": float(product.regular_price) if product.regular_price else None,
        "images": gallery or [img.url for img in product.images],
        "all_image_urls": gallery or [img.url for img in product.images],
        "sections": [{"title": s.title, "html": s.html} for s in product.sections],
        "stickers": [s.name.lower() for s in product.stickers],
        "labels": [l.name for l in product.labels],
        "nutrition": [{"key": n.key, "value": n.value, "unit": n.unit} for n in product.nutrition],
        "allergens": [{"name": a.name, "contains": a.contains} for a in product.allergens],
        "is_liked": False,
    }
    return _massage_product(d)
 async def toggle_product_like(
    session: AsyncSession,
    user_id: int,
    product_slug: str,
 ) -> tuple[bool, Optional[str]]:
    """
    Toggle a product like for a given user using soft deletes.
    Returns (liked_state, error_message).
    - If error_message is not None, an error occurred.
    - liked_state indicates whether product is now liked (True) or unliked (False).
    """
    from sqlalchemy import func, update
    # Get product_id from slug
    product_id = await session.scalar(
        select(Product.id).where(Product.slug == product_slug, Product.deleted_at.is_(None))
    )
    if not product_id:
        return False, "Product not found"
    # Check if like exists (not deleted)
    existing = await session.scalar(
        select(ProductLike).where(
            ProductLike.user_id == user_id,
            ProductLike.product_slug == product_slug,
            ProductLike.deleted_at.is_(None),
        )
    )
    if existing:
        # Unlike: soft delete the like
        await session.execute(
            update(ProductLike)
            .where(
                ProductLike.user_id == user_id,
                ProductLike.product_slug == product_slug,
                ProductLike.deleted_at.is_(None),
            )
            .values(deleted_at=func.now())
        )
        return False, None
    else:
        # Like: add a new like
        new_like = ProductLike(
            user_id=user_id,
            product_slug=product_slug,
        )
        session.add(new_like)
        return True, None
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -0,0 +1,31 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # Optional: wait for Postgres to be reachable
 if [[ -n "${DATABASE_HOST:-}" && -n "${DATABASE_PORT:-}" ]]; then
  echo "Waiting for Postgres at ${DATABASE_HOST}:${DATABASE_PORT}..."
  for i in {1..60}; do
    (echo > /dev/tcp/${DATABASE_HOST}/${DATABASE_PORT}) >/dev/null 2>&1 && break || true
    sleep 1
  done
 fi
 # Run DB migrations (uses alembic.ini/env.py to resolve the DB URL)
 echo "Running Alembic migrations..."
 alembic upgrade head
 # Clear Redis page cache on deploy
 if [[ -n "${REDIS_URL:-}" && "${REDIS_URL}" != "no" ]]; then
  echo "Flushing Redis cache..."
  python3 -c "
 import redis, os
 r = redis.from_url(os.environ['REDIS_URL'])
 r.flushall()
 print('Redis cache cleared.')
 " || echo "Redis flush failed (non-fatal), continuing..."
 fi
 # Start the app
 # APP_MODULE can be overridden per-service (e.g. apps.market.app:app)
 echo "Starting Hypercorn (${APP_MODULE:-suma_browser.app.app:app})..."
 PYTHONUNBUFFERED=1 exec hypercorn "${APP_MODULE:-suma_browser.app.app:app}" --bind 0.0.0.0:${PORT:-8000}
--- a/scrape-test.sh
+++ b/scrape-test.sh
@@ -0,0 +1,6 @@
 . .env
 source venv/bin/activate
 rm -rf _debug/*
 python test_scrape_detail.py --out ./_debug --slug sum-saag-suma-aloo-saag-12-x-400g-vf270-2-html
 #git -C _debug status
 #git -C _debug diff
--- a/scrape.sh
+++ b/scrape.sh
@@ -0,0 +1,5 @@
 . .env
 echo sumauser: $SUMA_USER
 source .venv/bin/activate   # was venv/bin/a
 python scrape_to_snapshot.py     --out ./_snapshot     --max-pages 50     --max-products 200000     --concurrency 8 
--- a/scrape/init.py
+++ b/scrape/init.py
--- a/scrape/build_snapshot/init.py
+++ b/scrape/build_snapshot/init.py
@@ -0,0 +1 @@
 from .build_snapshot import build_snapshot
--- a/scrape/build_snapshot/build_snapshot.py
+++ b/scrape/build_snapshot/build_snapshot.py
@@ -0,0 +1,104 @@
 #!/usr/bin/env python3
 from __future__ import annotations
 import os
 from typing import Dict, Set
 from ..http_client import configure_cookies
 from ..get_auth import login
 from config import config
 from utils import log
 # DB: persistence helpers
 from .tools import (
    _resolve_sub_redirects,
    valid_subs,
    candidate_subs,
    rewrite_nav,
    capture_product_slugs,
    fetch_and_upsert_products,
 )
 from ..nav import nav_scrape
 # ------------------------ core ------------------------
 async def build_snapshot(
        concurrency: int,
        user: str,
        password: str,
        save_nav,
        capture_listing,
        upsert_product,
        log_product_result,
        save_subcategory_redirects,
        save_link_reports = None,
 ) -> None:
    # NOTE: we keep ensure_dir for listings iteration but no longer write JSON files.
    # Make project importable
    import sys
    sys.path.insert(0, os.path.abspath("."))
    cookies = await login(username=user, password=password)
    await configure_cookies(cookies)
    for k, v in dict(cookies).items():
        print("logged in with", k, v)
    # 1) NAV
    log("Fetching nav…")
    nav = await nav_scrape()
    # Build valid subs per top from nav
    valid_subs_by_top: Dict[str, Set[str]] = valid_subs(nav)
    # Resolve redirects for all subs in nav first
    nav_sub_candidates = candidate_subs(nav)
    nav_redirects = await _resolve_sub_redirects(
        base_url=config()["base_url"],
        candidates=nav_sub_candidates,
        allowed_tops=set(config()["categories"]["allow"].values()),
        valid_subs_by_top=valid_subs_by_top,
    )
    rewrite_nav(nav, nav_redirects)
    # DB: save nav
    await save_nav(nav)
    product_slugs: Set[str] = await capture_product_slugs(
        nav,
        capture_listing
    )
    unknown_sub_paths: Set[str] = set()
    # 3) PRODUCTS (fetch details)
    await fetch_and_upsert_products(
        upsert_product,
        log_product_result,
        save_link_reports,
        concurrency,
        product_slugs,
        valid_subs_by_top,
        unknown_sub_paths
    )
    # Subcategory redirects from HTML
    log("Resolving subcategory redirects…")
    html_redirects = await _resolve_sub_redirects(
        base_url=config()["base_url"],
        candidates=unknown_sub_paths,
        allowed_tops=set(config()["categories"]["allow"].values()),
        valid_subs_by_top=valid_subs_by_top,
    )
    sub_redirects: Dict[str, str] = dict(nav_redirects)
    sub_redirects.update(html_redirects)
    # DB: persist redirects
    await save_subcategory_redirects(sub_redirects)
    log("Snapshot build complete (to Postgres).")
--- a/scrape/build_snapshot/tools/APP_ROOT_PLACEHOLDER.py
+++ b/scrape/build_snapshot/tools/APP_ROOT_PLACEHOLDER.py
@@ -0,0 +1 @@
 APP_ROOT_PLACEHOLDER = "[**__APP_ROOT__**]"
--- a/scrape/build_snapshot/tools/init.py
+++ b/scrape/build_snapshot/tools/init.py
@@ -0,0 +1 @@
--- a/scrape/build_snapshot/tools/_anchor_text.py
+++ b/scrape/build_snapshot/tools/_anchor_text.py
@@ -0,0 +1,6 @@
 def _anchor_text(a) -> str:
    try:
        txt = " ".join((a.get_text(" ") or "").split())
        return txt[:200]
    except Exception:
        return ""
--- a/scrape/build_snapshot/tools/_collect_html_img_srcs.py
+++ b/scrape/build_snapshot/tools/_collect_html_img_srcs.py
@@ -0,0 +1,16 @@
 from bs4 import BeautifulSoup
 from typing import List, Optional
 def _collect_html_img_srcs(html: Optional[str]) -> List[str]:
    urls: List[str] = []
    if not html:
        return urls
    try:
        soup = BeautifulSoup(html, "lxml")
        for img in soup.find_all("img"):
            src = img.get("src")
            if src:
                urls.append(src)
    except Exception:
        pass
    return urls
--- a/scrape/build_snapshot/tools/_dedupe_preserve_order.py
+++ b/scrape/build_snapshot/tools/_dedupe_preserve_order.py
@@ -0,0 +1,14 @@
 from typing import Iterable, List, Set
 def _dedupe_preserve_order(urls: Iterable[str]) -> List[str]:
    seen: Set[str] = set()
    out: List[str] = []
    for u in urls:
        if not u or not isinstance(u, str):
            continue
        if u in seen:
            continue
        seen.add(u)
        out.append(u)
    return out
--- a/scrape/build_snapshot/tools/_product_dict_is_cf.py
+++ b/scrape/build_snapshot/tools/_product_dict_is_cf.py
@@ -0,0 +1,32 @@
 from typing import Dict,Optional, Tuple
 _CF_TOKENS = (
    "One moment, please...",
    "Please wait while your request is being verified",
    "/cdn-cgi/challenge-platform/",
    "rocket-loader.min.js",
 )
 def _looks_like_cf_html(html: Optional[str]) -> Tuple[bool, Optional[str]]:
    if not html:
        return False, None
    for tok in _CF_TOKENS:
        if tok in html:
            return True, tok
    return False, None
 def _product_dict_is_cf(d: Dict) -> Tuple[bool, Optional[str]]:
    title = (d.get("title") or "").strip()
    if title.lower() == "one moment, please...":
        return True, "One moment, please..."
    ok, tok = _looks_like_cf_html(d.get("description_html"))
    if ok:
        return True, tok
    for sec in d.get("sections") or []:
        if isinstance(sec, dict) and sec.get("html"):
            ok2, tok2 = _looks_like_cf_html(sec["html"])
            if ok2:
                return True, tok2
    if not d.get("images") and not d.get("description_html") and not d.get("sections"):
        return True, "all_empty_heuristic"
    return False, None
--- a/scrape/build_snapshot/tools/_resolve_sub_redirects.py
+++ b/scrape/build_snapshot/tools/_resolve_sub_redirects.py
@@ -0,0 +1,34 @@
 from typing import Dict, Set
 from urllib.parse import urlparse, urljoin
 import httpx
 async def _resolve_sub_redirects(
    base_url: str,
    candidates: Set[str],
    allowed_tops: Set[str],
    valid_subs_by_top: Dict[str, Set[str]],
 ) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    if not candidates:
        return mapping
    timeout = httpx.Timeout(20.0, connect=10.0)
    async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, http2=True) as client:
        for path in sorted(candidates):
            try:
                url = urljoin(base_url, path)
                r = await client.get(url)
                final = str(r.url)
                p = urlparse(final)
                parts = [x for x in (p.path or "").split("/") if x]
                if len(parts) >= 2:
                    top_new = parts[0].lower()
                    sub_new = parts[1].lower().removesuffix(".html").removesuffix(".htm")
                    if top_new in allowed_tops:
                        new_path = f"/{top_new}/{sub_new}"
                        if new_path != path:
                            mapping[path] = new_path
                            valid_subs_by_top.setdefault(top_new, set()).add(sub_new)
            except Exception:
                continue
    return mapping
--- a/scrape/build_snapshot/tools/_rewrite_links_fragment.py
+++ b/scrape/build_snapshot/tools/_rewrite_links_fragment.py
@@ -0,0 +1,100 @@
 from typing import Dict, List, Optional, Set
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse, urljoin
 from ._anchor_text import _anchor_text
 from suma_browser.app.bp.browse.services.slugs import product_slug_from_href
 from .APP_ROOT_PLACEHOLDER import APP_ROOT_PLACEHOLDER
 def _rewrite_links_fragment(
    html: Optional[str],
    base_url: str,
    known_slugs: Set[str],
    category_allow_values: Set[str],
    valid_subs_by_top: Dict[str, Set[str]],
    current_product_slug: str,
    link_errors: List[Dict],
    link_externals: List[Dict],
    unknown_sub_paths: Set[str],
 ) -> str:
    if not html:
        return ""
    soup = BeautifulSoup(html, "lxml")
    base_host = urlparse(base_url).netloc
    for a in soup.find_all("a", href=True):
        raw = (a.get("href") or "").strip()
        if not raw:
            continue
        low = raw.lower()
        if low.startswith(("mailto:", "tel:", "javascript:", "data:")) or low.startswith("#"):
            continue
        abs_href = urljoin(base_url, raw)
        p = urlparse(abs_href)
        if not p.scheme or not p.netloc:
            continue
        if p.netloc != base_host:
            link_externals.append({
                "product": current_product_slug,
                "href": abs_href,
                "text": _anchor_text(a),
                "host": p.netloc,
            })
            continue
        parts = [x for x in (p.path or "").split("/") if x]
        if not parts:
            continue
        last = parts[-1].lower()
        if last.endswith((".html", ".htm")):
            target_slug = product_slug_from_href(abs_href)
            if target_slug and target_slug in known_slugs:
                a["href"] = f"{APP_ROOT_PLACEHOLDER}/product/{target_slug}"
            else:
                link_errors.append({
                    "product": current_product_slug,
                    "href": abs_href,
                    "text": _anchor_text(a),
                    "top": None,
                    "sub": None,
                    "target_slug": target_slug or None,
                    "type": "suma_product_unknown",
                })
            continue
        top = parts[0].lower()
        if top in category_allow_values:
            if len(parts) == 1:
                a["href"] = f"{APP_ROOT_PLACEHOLDER}/{top}"
            else:
                sub = parts[1]
                if sub.lower().endswith((".html", ".htm")):
                    sub = sub.rsplit(".", 1)[0]
                if sub in (valid_subs_by_top.get(top) or set()):
                    a["href"] = f"{APP_ROOT_PLACEHOLDER}/{top}/{sub}"
                else:
                    unknown_path = f"/{top}/{sub}"
                    unknown_sub_paths.add(unknown_path)
                    a["href"] = f"{APP_ROOT_PLACEHOLDER}{unknown_path}"
                    link_errors.append({
                        "product": current_product_slug,
                        "href": abs_href,
                        "text": _anchor_text(a),
                        "top": top,
                        "sub": sub,
                        "target_slug": None,
                        "type": "suma_category_invalid_sub_pending",
                    })
        else:
            link_errors.append({
                "product": current_product_slug,
                "href": abs_href,
                "text": _anchor_text(a),
                "top": top,
                "sub": parts[1] if len(parts) > 1 else None,
                "target_slug": None,
                "type": "suma_other",
            })
    for t in soup.find_all(["html", "body"]):
        t.unwrap()
    return "".join(str(c) for c in soup.contents).strip()
--- a/scrape/build_snapshot/tools/candidate_subs.py
+++ b/scrape/build_snapshot/tools/candidate_subs.py
@@ -0,0 +1,14 @@
 from typing import Dict, Set
 def candidate_subs(nav: Dict[str, Dict])-> Set[str]:
    nav_sub_candidates: Set[str] = set()
    for label, data in (nav.get("cats") or {}).items():
        top_slug = (data or {}).get("slug")
        if not top_slug:
            continue
        for s in (data.get("subs") or []):
            sub_slug = (s.get("slug") or "").strip()
            if sub_slug:
                nav_sub_candidates.add(f"/{top_slug}/{sub_slug}")
    return nav_sub_candidates
--- a/scrape/build_snapshot/tools/capture_category.py
+++ b/scrape/build_snapshot/tools/capture_category.py
@@ -0,0 +1,18 @@
 from urllib.parse import urljoin
 from config import config
 from utils import log
 from ...listings import scrape_products
 async def capture_category(
    slug: str,
 ):
    list_url = urljoin(config()["base_url"], f"/{slug}")
    log(f"[{slug}] page 1…")
    items, total_pages = await scrape_products(list_url, page=1)
    pmax = int(total_pages or 1)
    for p in range(2, pmax + 1):
        log(f"[{slug}] page {p}…")
        items_p, _tp = await scrape_products(list_url, page=p)
        items.extend(items_p)
    return (list_url, items, total_pages)
--- a/scrape/build_snapshot/tools/capture_product_slugs.py
+++ b/scrape/build_snapshot/tools/capture_product_slugs.py
@@ -0,0 +1,25 @@
 from typing import Dict, Set
 from .capture_category import capture_category
 from .capture_sub import capture_sub
 from config import config
 async def capture_product_slugs(
   nav: Dict[str, Dict],
   capture_listing,
 ):
    product_slugs: Set[str] = set()
    for label, slug in config()["categories"]["allow"].items():
        lpars = await capture_category( slug)
        await capture_listing(*lpars)
        (_, items, __) = lpars
        for slug_ in items:
            product_slugs.add(slug_)
        for sub in (nav["cats"].get(label, {}).get("subs", []) or []):
            lpars = await capture_sub(sub, slug)
            await capture_listing(*lpars)
            (_, items, __) = lpars
            for slug_ in items:
                product_slugs.add(slug_)
    return product_slugs
--- a/scrape/build_snapshot/tools/capture_sub.py
+++ b/scrape/build_snapshot/tools/capture_sub.py
@@ -0,0 +1,22 @@
 from urllib.parse import urljoin
 from urllib.parse import urljoin
 from config import config
 from utils import log
 from ...listings import scrape_products
 async def capture_sub(
  sub,
  slug,
 ):
    sub_slug = sub.get("slug")
    if not sub_slug:
        return
    sub_url = urljoin(config()["base_url"], f"/{slug}/{sub_slug}")
    log(f"[{slug}/{sub_slug}] page 1…")
    items_s, total_pages_s = await scrape_products(sub_url, page=1)
    spmax = int(total_pages_s or 1)
    for p in range(2, spmax + 1):
        log(f"[{slug}/{sub_slug}] page {p}…")
        items_ps, _ = await scrape_products(sub_url, page=p)
        items_s.extend(items_ps)
    return (sub_url, items_s, total_pages_s)
--- a/scrape/build_snapshot/tools/fetch_and_upsert_product.py
+++ b/scrape/build_snapshot/tools/fetch_and_upsert_product.py
@@ -0,0 +1,106 @@
 import asyncio
 from typing import List
 import httpx
 from ...html_utils import to_fragment
 from suma_browser.app.bp.browse.services.slugs import suma_href_from_html_slug
 from config import config
 from utils import log
 # DB: persistence helpers
 from ...product.product_detail import scrape_product_detail
 from ._product_dict_is_cf import _product_dict_is_cf
 from ._rewrite_links_fragment import _rewrite_links_fragment
 from ._dedupe_preserve_order import _dedupe_preserve_order
 from ._collect_html_img_srcs import _collect_html_img_srcs
 async def fetch_and_upsert_product(
    upsert_product,
    log_product_result,
    sem: asyncio.Semaphore,
    slug: str,
    product_slugs,
    category_values,
    valid_subs_by_top,
    link_errors,
    link_externals,
    unknown_sub_paths
 ) -> bool:
  href = suma_href_from_html_slug(slug)
  try:
      async with sem:
          d = await scrape_product_detail(href)
      is_cf, cf_token = _product_dict_is_cf(d)
      if is_cf:
          payload = {
              "slug": slug,
              "href_tried": href,
              "error_type": "CloudflareChallengeDetected",
              "error_message": f"Detected Cloudflare interstitial via token: {cf_token}",
              "cf_token": cf_token,
          }
          await log_product_result(ok=False, payload=payload)
          log(f"  ! CF challenge detected: {slug} ({cf_token})")
          return False
      # Rewrite embedded links; collect reports
      if d.get("description_html"):
          d["description_html"] = _rewrite_links_fragment(
              d["description_html"], config()["base_url"], product_slugs, category_values,
              valid_subs_by_top, slug, link_errors, link_externals, unknown_sub_paths
          )
          d["description_html"] = to_fragment(d["description_html"])
      if d.get("sections"):
          for sec in d["sections"]:
              if isinstance(sec, dict) and sec.get("html"):
                  sec["html"] = _rewrite_links_fragment(
                      sec["html"], config()["base_url"], product_slugs, category_values,
                      valid_subs_by_top, slug, link_errors, link_externals, unknown_sub_paths
                  )
                  sec["html"] = to_fragment(sec["html"])
      # Images
      gallery = _dedupe_preserve_order(d.get("images") or [])
      embedded: List[str] = []
      if d.get("description_html"):
          embedded += _collect_html_img_srcs(d["description_html"])
      for sec in d.get("sections", []) or []:
          if isinstance(sec, dict) and sec.get("html"):
              embedded += _collect_html_img_srcs(sec["html"])
      embedded = _dedupe_preserve_order(embedded)
      all_imgs = _dedupe_preserve_order(list(gallery) + list(embedded))
      d["images"] = gallery
      d["embedded_image_urls"] = embedded
      d["all_image_urls"] = all_imgs
      await upsert_product(slug, href, d)
      # DB: upsert product + success log
      return True
  except Exception as e:
      payload = {
          "slug": slug,
          "href_tried": href,
          "error_type": e.__class__.__name__,
          "error_message": str(e),
      }
      try:
          if isinstance(e, httpx.HTTPStatusError):
              payload["http_status"] = getattr(e.response, "status_code", None)
              req = getattr(e, "request", None)
              if req is not None and getattr(req, "url", None) is not None:
                  payload["final_url"] = str(req.url)
          elif isinstance(e, httpx.TransportError):
              payload["transport_error"] = True
      except Exception:
          pass
      await log_product_result(ok=False, payload=payload)
      log(f"  ! product failed: {slug} ({e})")
      return False
--- a/scrape/build_snapshot/tools/fetch_and_upsert_products.py
+++ b/scrape/build_snapshot/tools/fetch_and_upsert_products.py
@@ -0,0 +1,49 @@
 import asyncio
 from typing import Dict, List, Set
 from config import config
 from utils import log
 from .fetch_and_upsert_product import fetch_and_upsert_product
 async def fetch_and_upsert_products(
    upsert_product,
    log_product_result,
    save_link_reports = None,
    concurrency: int=8,
    product_slugs: Set[str] = set(),
    valid_subs_by_top: Dict[str, Set[str]] = {},
    unknown_sub_paths: Set[str] = set()
 ):
    sem = asyncio.Semaphore(max(1, concurrency))
    link_errors: List[Dict] = []
    link_externals: List[Dict] = []
    category_values: Set[str] = set(config()["categories"]["allow"].values())
    to_fetch = sorted(list(product_slugs))
    log(f"Fetching {len(to_fetch)} product details (concurrency={concurrency})…")
    tasks = [asyncio.create_task(
        fetch_and_upsert_product(
            upsert_product,
            log_product_result,
            sem,
            s,
            product_slugs,
            category_values,
            valid_subs_by_top,
            link_errors,
            link_externals,
            unknown_sub_paths
        )
    ) for s in to_fetch]
    done = 0
    ok_count = 0
    for coro in asyncio.as_completed(tasks):
        ok = await coro
        done += 1
        if ok:
            ok_count += 1
        if done % 50 == 0 or done == len(tasks):
            log(f"  …{done}/{len(tasks)} saved (ok={ok_count})")
    if save_link_reports:
        await save_link_reports(link_errors, link_externals)
--- a/scrape/build_snapshot/tools/rewrite_nav.py
+++ b/scrape/build_snapshot/tools/rewrite_nav.py
@@ -0,0 +1,24 @@
 from typing import Dict
 from urllib.parse import urljoin
 from config import config
 def rewrite_nav(nav: Dict[str, Dict], nav_redirects:Dict[str, str]):
    if nav_redirects:
      for label, data in (nav.get("cats") or {}).items():
          top_slug = (data or {}).get("slug")
          if not top_slug:
              continue
          new_subs = []
          for s in (data.get("subs") or []):
              old_sub = (s.get("slug") or "").strip()
              if not old_sub:
                  continue
              old_path = f"/{top_slug}/{old_sub}"
              canonical_path = nav_redirects.get(old_path, old_path)
              parts = [x for x in canonical_path.split("/") if x]
              top2, sub2 = parts[0], parts[1]
              s["slug"] = sub2
              s["href"] = urljoin(config()["base_url"], f"/{top2}/{sub2}")
              new_subs.append(s)
          data["subs"] = new_subs
--- a/scrape/build_snapshot/tools/valid_subs.py
+++ b/scrape/build_snapshot/tools/valid_subs.py
@@ -0,0 +1,16 @@
 from typing import Dict, Set
 # make valid subs for ewch top in nav
 def valid_subs(nav: Dict[str, Dict])->Dict[str, Set[str]] :
    valid_subs_by_top: Dict[str, Set[str]] = {}
    for label, data in (nav.get("cats") or {}).items():
        top_slug = (data or {}).get("slug")
        if not top_slug:
            continue
        subs_set = {
            (s.get("slug") or "").strip()
            for s in (data.get("subs") or [])
            if s.get("slug")
        }
        valid_subs_by_top[top_slug] = subs_set
    return valid_subs_by_top
--- a/scrape/get_auth.py
+++ b/scrape/get_auth.py
@@ -0,0 +1,244 @@
 from typing import Optional, Dict, Any, List
 from urllib.parse import urljoin
 import httpx
 from bs4 import BeautifulSoup
 from config import config
 class LoginFailed(Exception):
    def __init__(self, message: str, *, debug: Dict[str, Any]):
        super().__init__(message)
        self.debug = debug
 def _ff_headers(referer: Optional[str] = None, origin: Optional[str] = None) -> Dict[str, str]:
    h = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Accept-Language": "en-GB,en;q=0.5",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "DNT": "1",
        "Sec-GPC": "1",
        "Cache-Control": "no-cache",
        "Pragma": "no-cache",
    }
    if referer:
        h["Referer"] = referer
    if origin:
        h["Origin"] = origin
    return h
 def _cookie_header_from_jar(jar: httpx.Cookies, domain: str, path: str = "/") -> str:
    pairs: List[str] = []
    for c in jar.jar:
        if not c.name or c.value is None:
            continue
        dom = (c.domain or "").lstrip(".")
        if not dom:
            continue
        if not (domain == dom or domain.endswith("." + dom) or dom.endswith("." + domain)):
            continue
        if not (path.startswith(c.path or "/")):
            continue
        pairs.append(f"{c.name}={c.value}")
    return "; ".join(pairs)
 def _extract_magento_errors(html_text: str) -> list[str]:
    msgs: list[str] = []
    try:
        soup = BeautifulSoup(html_text or "", "lxml")
        for sel in [
            ".message-error",
            ".messages .message-error",
            ".page.messages .message-error",
            "[data-ui-id='message-error']",
            ".message.warning",
            ".message.notice",
        ]:
            for box in soup.select(sel):
                t = " ".join((box.get_text(" ") or "").split())
                if t and t not in msgs:
                    msgs.append(t)
    except Exception:
        pass
    return msgs
 def _looks_like_login_page(html_text: str) -> bool:
    try:
        s = BeautifulSoup(html_text or "", "lxml")
        if s.select_one("form#login-form.form-login"):
            return True
        title = (s.title.get_text() if s.title else "").strip().lower()
        if "customer login" in title:
            return True
    except Exception:
        pass
    return False
 def _chrome_headers(referer=None, origin=None):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                      "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
    }
    if referer:
        headers["Referer"] = referer
    if origin:
        headers["Origin"] = origin
    return headers
 async def login(
    username: str,
    password: str,
    *,
    extra_cookies = {},  # ok to pass cf_clearance etc., but NOT form_key
    timeout: float = 30.0,
 ) -> httpx.Cookies:
    """
    Attempt login and return an authenticated cookie jar.
    Success criteria (strict):
      1) /customer/section/load?sections=customer reports is_logged_in == True
         OR
      2) GET /customer/account/ resolves to an account page (not the login page).
    Otherwise raises LoginFailed with debug info.
    """
    limits = httpx.Limits(max_connections=10, max_keepalive_connections=6)
    cookies = httpx.Cookies()
    for k, v in {
      **extra_cookies,
      "pr-cookie-consent": '["all"]',
      "user_allowed_save_cookie": '{"1":1}',
    }.items():
        if k.lower() == "form_key":
            continue
        cookies.set(k, v, domain="wholesale.suma.coop", path="/")
    base_login = config()["base_login"]
    base_url = config()["base_url"]
    async with httpx.AsyncClient(
        follow_redirects=True,
        timeout=httpx.Timeout(timeout, connect=15.0),
        http2=True,
        limits=limits,
        cookies=cookies,
        headers=_chrome_headers(),
        trust_env=True,
    ) as client:
        # 1) GET login page for fresh form_key
        import time
        login_bust = base_login + ("&" if "?" in base_login else "?") + f"_={int(time.time()*1000)}"
        login_bust = base_login
        r_get = await client.get(login_bust, headers=_chrome_headers())
        print("Login GET failed. Status:", r_get.status_code)
        print("Login GET URL:", r_get.url)
        print("Response text:", r_get.text[:1000])  # trim if long
        r_get.raise_for_status()
        soup = BeautifulSoup(r_get.text, "lxml")
        form = soup.select_one("form.form.form-login#login-form") or soup.select_one("#login-form")
        if not form:
            raise LoginFailed(
                "Login form not found (possible bot challenge or theme change).",
                debug={"get_status": r_get.status_code, "final_url": str(r_get.url)},
            )
        action = urljoin(base_login, form.get("action") or base_login)
        fk_el = form.find("input", attrs={"name": "form_key"})
        hidden_form_key = (fk_el.get("value") if fk_el else "") or ""
        # mirror Magento behavior: form_key also appears as a cookie
        client.cookies.set("form_key", hidden_form_key, domain="wholesale.suma.coop", path="/")
        payload = {
            "form_key": hidden_form_key,
            "login[username]": username,
            "login[password]": password,
            "send": "Login",
        }
        post_headers = _chrome_headers(referer=base_login, origin=base_url)
        post_headers["Content-Type"] = "application/x-www-form-urlencoded"
        post_headers["Cookie"] = _cookie_header_from_jar(
            client.cookies, domain="wholesale.suma.coop", path="/customer/"
        )
        r_post = await client.post(action, data=payload, headers=post_headers)
        # 2) Primary check: sections API must say logged in
        is_logged_in = False
        sections_url = "https://wholesale.suma.coop/customer/section/load/?sections=customer&force_new_section_timestamp=1"
        section_json: Dict[str, Any] = {}
        try:
            r_sec = await client.get(sections_url, headers=_chrome_headers(referer=base_login))
            if r_sec.status_code == 200:
                section_json = r_sec.json()
                cust = section_json.get("customer") or {}
                is_logged_in = bool(cust.get("is_logged_in"))
        except Exception:
            pass
        # 3) Secondary check: account page should NOT be the login page
        looks_like_login = False
        final_account_url = ""
        try:
            r_acc = await client.get("https://wholesale.suma.coop/customer/account/", headers=_chrome_headers(referer=base_login))
            final_account_url = str(r_acc.url)
            looks_like_login = (
                "/customer/account/login" in final_account_url
                or _looks_like_login_page(r_acc.text)
            )
        except Exception:
            # ignore; we'll rely on section status
            pass
        # Decide success/failure strictly
        if not (is_logged_in or (final_account_url and not looks_like_login)):
            errors = _extract_magento_errors(r_post.text)
            # Clean up transient form_key cookie
            try:
                client.cookies.jar.clear("wholesale.suma.coop", "/", "form_key")
            except Exception:
                pass
            raise LoginFailed(
                errors[0] if errors else "Invalid username or password.",
                debug={
                    "get_status": r_get.status_code,
                    "post_status": r_post.status_code,
                    "post_final_url": str(r_post.url),
                    "sections_customer": section_json.get("customer"),
                    "account_final_url": final_account_url,
                    "looks_like_login_page": looks_like_login,
                },
            )
        def clear_cookie_everywhere(cookies: httpx.Cookies, name: str) -> None:
          to_delete = []
          for c in list(cookies.jar):  # http.cookiejar.Cookie objects
              if c.name == name:
                  # Note: CookieJar.clear requires exact (domain, path, name)
                  to_delete.append((c.domain, c.path, c.name))
          for domain, path, nm in to_delete:
              try:
                  cookies.jar.clear(domain, path, nm)
              except KeyError:
                  # Mismatch can happen if domain has a leading dot vs not, etc.
                  # Try again with a normalized domain variant.
                  if domain and domain.startswith("."):
                      cookies.jar.clear(domain.lstrip("."), path, nm)
                  else:
                      # or try with leading dot
                      cookies.jar.clear("." + domain, path, nm)
          if name in cookies:
              del cookies[name]
        clear_cookie_everywhere(client.cookies, "form_key")
        #client.cookies.jar.clear(config()["base_host"] or "wholesale.suma.coop", "/", "form_key")
        print('cookies', client.cookies)
        return client.cookies
--- a/scrape/html_utils.py
+++ b/scrape/html_utils.py
@@ -0,0 +1,44 @@
 # suma_browser/html_utils.py
 from __future__ import annotations
 from typing import Optional
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
 from config import config
 def to_fragment(html: Optional[str]) -> str:
    """Return just the fragment contents (no <html>/<body> wrappers)."""
    if not html:
        return ""
    soup = BeautifulSoup(html, "lxml")
    # unwrap document-level containers
    for t in soup.find_all(["html", "body"]):
        t.unwrap()
    return "".join(str(c) for c in soup.contents).strip()
 def absolutize_fragment(html: Optional[str]) -> str:
    """Absolutize href/src against BASE_URL and return a fragment (no wrappers)."""
    if not html:
        return ""
    frag = BeautifulSoup(html, "lxml")
    for tag in frag.find_all(True):
        if tag.has_attr("href"):
            raw = str(tag["href"])
            abs_href = urljoin(config()["base_url"], raw) if raw.startswith("/") else raw
            #if rewrite_suma_href_to_local:
            #    local = rewrite_suma_href_to_local(abs_href)
            #    tag["href"] = local if local else abs_href
            #else:
            tag["href"] = abs_href
        if tag.has_attr("src"):
            raw = str(tag["src"])
            tag["src"] = urljoin(config()["base_url"], raw) if raw.startswith("/") else raw
    # unwrap wrappers and return only the inner HTML
    for t in frag.find_all(["html", "body"]):
        t.unwrap()
    return "".join(str(c) for c in frag.contents).strip()
--- a/scrape/http_client.py
+++ b/scrape/http_client.py
@@ -0,0 +1,220 @@
 # suma_browser/http_client.py
 from __future__ import annotations
 import asyncio
 import os
 import secrets
 from typing import Optional, Dict
 import httpx
 from config import config
 _CLIENT: httpx.AsyncClient | None = None
 # ----- optional decoders -> Accept-Encoding
 BROTLI_OK = False
 ZSTD_OK = False
 try:
    import brotli  # noqa: F401
    BROTLI_OK = True
 except Exception:
    pass
 try:
    import zstandard as zstd  # noqa: F401
    ZSTD_OK = True
 except Exception:
    pass
 def _accept_encoding() -> str:
    enc = ["gzip", "deflate"]
    if BROTLI_OK:
        enc.append("br")
    if ZSTD_OK:
        enc.append("zstd")
    return ", ".join(enc)
 FIREFOX_UA = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0"
 def _ff_headers(referer: Optional[str] = None) -> Dict[str, str]:
    h = {
        "User-Agent": FIREFOX_UA,
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-GB,en;q=0.5",
        "Accept-Encoding": _accept_encoding(),
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "none" if not referer else "same-origin",
        "Sec-Fetch-User": "?1",
        "DNT": "1",
        "Sec-GPC": "1",
        "Priority": "u=0, i",
        "Cache-Control": "no-cache",
        "Pragma": "no-cache",
    }
    if referer:
        h["Referer"] = referer
    return h
 def _chrome_headers(referer=None, origin=None):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                      "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
    }
    if referer:
        headers["Referer"] = referer
    if origin:
        headers["Origin"] = origin
    return headers
 def _parse_cookie_header(cookie_header: str) -> Dict[str, str]:
    jar: Dict[str, str] = {}
    for part in cookie_header.split(";"):
        part = part.strip()
        if not part or "=" not in part:
            continue
        k, v = part.split("=", 1)
        jar[k.strip()] = v.strip()
    return jar
 def _looks_like_cloudflare(html: bytes) -> bool:
    if not html:
        return False
    s = html[:40000].lower()
    return (
        b"please wait while your request is being verified" in s
        or b"/cdn-cgi/challenge-platform/scripts/jsd/main.js" in s
        or b"rocket-loader.min.js" in s
        or b"cf-ray" in s
        or b"challenge-platform" in s
        or b"cf-chl-" in s
    )
 # -------- runtime cookie configuration (preferred over env) --------------------
 _INITIAL_COOKIES: Dict[str, str] = {}
 _INITIAL_COOKIE_HEADER: Optional[str] = None
 async def configure_cookies(cookies: Dict[str, str]) -> None:
    """
    Configure initial cookies programmatically (preferred over env).
    Call BEFORE the first request (i.e., before get_client()/fetch()).
    If a client already exists, its jar is updated immediately.
    """
    global _INITIAL_COOKIES, _INITIAL_COOKIE_HEADER
    _INITIAL_COOKIE_HEADER = None
    _INITIAL_COOKIES = dict(cookies or {})
    # If client already built, update it now
    if _CLIENT is not None:
        print('configuring cookies')
        host = config()["base_host"] or "wholesale.suma.coop"
        for k, v in _INITIAL_COOKIES.items():
            _CLIENT.cookies.set(k, v, domain=host, path="/")
 def configure_cookies_from_header(cookie_header: str) -> None:
    """
    Configure initial cookies from a raw 'Cookie:' header string.
    Preferred over env; call BEFORE the first request.
    """
    global _INITIAL_COOKIES, _INITIAL_COOKIE_HEADER
    _INITIAL_COOKIE_HEADER = cookie_header or ""
    _INITIAL_COOKIES = _parse_cookie_header(_INITIAL_COOKIE_HEADER)
    if _CLIENT is not None:
        host = config()["base_host"] or "wholesale.suma.coop"
        for k, v in _INITIAL_COOKIES.items():
            _CLIENT.cookies.set(k, v, domain=host, path="/")
 # ------------------------------------------------------------------------------
 async def get_client() -> httpx.AsyncClient:
    """Public accessor (same as _get_client)."""
    return await _get_client()
 async def _get_client() -> httpx.AsyncClient:
    global _CLIENT
    if _CLIENT is None:
        timeout = httpx.Timeout(300.0, connect=150.0)
        limits = httpx.Limits(max_keepalive_connections=8, max_connections=16)
        _CLIENT = httpx.AsyncClient(
            follow_redirects=True,
            timeout=timeout,
            http2=True,
            limits=limits,
            headers=_chrome_headers(),
            trust_env=True,
        )
        # ---- Seed cookies (priority: runtime config > env var) ---------------
        host = config()["base_host"] or "wholesale.suma.coop"
        if _INITIAL_COOKIES or _INITIAL_COOKIE_HEADER:
            # From runtime config
            if _INITIAL_COOKIE_HEADER:
                _CLIENT.cookies.update(_parse_cookie_header(_INITIAL_COOKIE_HEADER))
            for k, v in _INITIAL_COOKIES.items():
                _CLIENT.cookies.set(k, v, domain=host, path="/")
        else:
            # Fallback to environment
            cookie_str = os.environ.get("SUMA_COOKIES", "").strip()
            if cookie_str:
                _CLIENT.cookies.update(_parse_cookie_header(cookie_str))
        # Ensure private_content_version is present
        if "private_content_version" not in _CLIENT.cookies:
            pcv = secrets.token_hex(16)
            _CLIENT.cookies.set("private_content_version", pcv, domain=host, path="/")
        # ---------------------------------------------------------------------
    return _CLIENT
 async def aclose_client() -> None:
    global _CLIENT
    if _CLIENT is not None:
        await _CLIENT.aclose()
        _CLIENT = None
 async def fetch(url: str, *, referer: Optional[str] = None, retries: int = 3) -> str:
    client = await _get_client()
    # Warm-up visit to look like a real session
    if len(client.cookies.jar) == 0:
        try:
            await client.get(config()["base_url"].rstrip("/") + "/", headers=_chrome_headers())
            await asyncio.sleep(0.25)
        except Exception:
            pass
    last_exc: Optional[Exception] = None
    for attempt in range(1, retries + 1):
        try:
            h = _chrome_headers(referer=referer or (config()["base_url"].rstrip("/") + "/"))
            r = await client.get(url, headers=h)
            if _looks_like_cloudflare(r.content):
                if attempt < retries:
                    await asyncio.sleep(0.9 if attempt == 1 else 1.3)
                    try:
                        await client.get(config()["base_url"].rstrip("/") + "/", headers=_chrome_headers())
                        await asyncio.sleep(0.4)
                    except Exception:
                        pass
                    continue
            try:
                r.raise_for_status()
            except httpx.HTTPStatusError as e:
                print(f"Fetch failed for {url}")
                print("Status:", r.status_code)
                print("Body:", r.text[:1000])  # Trimmed
                raise
            return r.text
        except Exception as e:
            last_exc = e
            if attempt >= retries:
                raise
            await asyncio.sleep(0.45 * attempt + 0.25)
    if last_exc:
        raise last_exc
    raise RuntimeError("fetch failed unexpectedly")
--- a/scrape/listings.py
+++ b/scrape/listings.py
@@ -0,0 +1,289 @@
 from __future__ import annotations
 import math
 import re
 from typing import Callable, Dict, List, Optional, Tuple
 from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
 from .http_client import fetch
 from suma_browser.app.bp.browse.services.slugs import product_slug_from_href
 from suma_browser.app.bp.browse.services.state import (
    KNOWN_PRODUCT_SLUGS,
    _listing_page_cache,
    _listing_page_ttl,
    _listing_variant_cache,
    _listing_variant_ttl,
    now,
 )
 from utils import normalize_text, soup_of
 from config import config
 def parse_total_pages_from_text(text: str) -> Optional[int]:
    m = re.search(r"Showing\s+(\d+)\s+of\s+(\d+)", text, re.I)
    if not m:
        return None
    shown = int(m.group(1))
    total = int(m.group(2))
    per_page = 36 if shown in (12, 24, 36) else shown
    return max(1, math.ceil(total / per_page))
 def _first_from_srcset(val: str) -> Optional[str]:
    if not val:
        return None
    first = val.split(",")[0].strip()
    parts = first.split()
    return parts[0] if parts else first
 def _abs_url(u: Optional[str]) -> Optional[str]:
    if not u:
        return None
    return urljoin(config()["base_url"], u) if isinstance(u, str) and u.startswith("/") else u
 def _collect_img_candidates(el) -> List[str]:
    urls: List[str] = []
    if not el:
        return urls
    attrs = ["src", "data-src", "data-original", "data-zoom-image", "data-thumb", "content", "href"]
    for a in attrs:
        v = el.get(a)
        if v:
            urls.append(v)
    for a in ["srcset", "data-srcset"]:
        v = el.get(a)
        if v:
            first = _first_from_srcset(v)
            if first:
                urls.append(first)
    return urls
 def _dedupe_preserve_order_by(seq: List[str], key: Callable[[str], str]) -> List[str]:
    seen = set()
    out: List[str] = []
    for s in seq:
        if not s:
            continue
        k = key(s)
        if k in seen:
            continue
        seen.add(k)
        out.append(s)
    return out
 def _filename_key(u: str) -> str:
    p = urlparse(u)
    path = p.path or ""
    if path.endswith("/"):
        path = path[:-1]
    last = path.split("/")[-1]
    return f"{p.netloc}:{last}".lower()
 def _parse_cards_from_soup(soup) -> List[Dict]:
    """Extract product tiles (name, href, image, desc) from listing soup.
    De-duplicate by slug to avoid doubles from overlapping selectors."""
    items: List[str] = []
    seen_slugs: set[str] = set()
    # Primary selectors (Magento 2 default)
    card_wrappers = soup.select(
        "li.product-item, .product-item, ol.products.list.items li, .products.list.items li, .product-item-info"
    )
    for card in card_wrappers:
        a = (
            card.select_one("a.product-item-link")
            or card.select_one(".product-item-name a")
            or card.select_one("a[href$='.html'], a[href$='.htm']")
        )
        if not a:
            continue
        #name = normalize_text(a.get_text()) or normalize_text(a.get("title") or "")
        href = a.get("href")
        #if not name or not href:
        #    continue
        if href.startswith("/"):
            href = urljoin(config()["base_url"], href)
        slug = product_slug_from_href(href)
        KNOWN_PRODUCT_SLUGS.add(slug)
        if slug and slug not in seen_slugs:
            seen_slugs.add(slug)
            items.append(slug)
    # Secondary: any product-looking anchors inside products container
    if not items:
        products_container = soup.select_one(".products") or soup
        for a in products_container.select("a[href$='.html'], a[href$='.htm']"):
            href = a.get("href")
            if href.startswith("/"):
                href = urljoin(config()["base_url"], href)
            slug = product_slug_from_href(href)
            KNOWN_PRODUCT_SLUGS.add(slug)
            if slug not in seen_slugs:
                seen_slugs.add(slug)
                items.append(slug)
    # Tertiary: JSON-LD fallback (ItemList/Product)
    if not items:
        import json
        def add_product(name: Optional[str], url: Optional[str], image: Optional[str]):
            if not url:
                return
            absu = urljoin(config()["base_url"], url) if url.startswith("/") else url
            slug = product_slug_from_href(absu)
            if not slug:
                return
            KNOWN_PRODUCT_SLUGS.add(slug)
            if slug not in seen_slugs:
                seen_slugs.add(slug)
                items.append(slug)
        for script in soup.find_all("script", attrs={"type": "application/ld+json"}):
            #try:
            data = json.loads(script.get_text())
            #except Exception:
            #    continue
            if isinstance(data, dict):
                if data.get("@type") == "ItemList" and isinstance(data.get("itemListElement"), list):
                    for it in data["itemListElement"]:
                        if isinstance(it, dict):
                            ent = it.get("item") or it
                            if isinstance(ent, dict):
                                add_product(
                                    ent.get("name"),
                                    ent.get("url"),
                                    (ent.get("image") if isinstance(ent.get("image"), str) else None),
                                )
                if data.get("@type") == "Product":
                    add_product(
                        data.get("name"),
                        data.get("url"),
                        (data.get("image") if isinstance(data.get("image"), str) else None),
                    )
            elif isinstance(data, list):
                for ent in data:
                    if not isinstance(ent, dict):
                        continue
                    if ent.get("@type") == "Product":
                        add_product(
                            ent.get("name"),
                            ent.get("url"),
                            (ent.get("image") if isinstance(ent.get("image"), str) else None),
                        )
                    if ent.get("@type") == "ItemList":
                        for it in ent.get("itemListElement", []):
                            if isinstance(it, dict):
                                obj = it.get("item") or it
                                if isinstance(obj, dict):
                                    add_product(
                                        obj.get("name"),
                                        obj.get("url"),
                                        (obj.get("image") if isinstance(obj.get("image"), str) else None),
                                    )
    return items
 def _with_query(url: str, add: Dict[str, str]) -> str:
    p = urlparse(url)
    q = dict(parse_qsl(p.query, keep_blank_values=True))
    q.update(add)
    new_q = urlencode(q)
    return urlunparse((p.scheme, p.netloc, p.path, p.params, new_q, p.fragment))
 def _with_page(url: str, page: int) -> str:
    if page and page > 1:
        return _with_query(url, {"p": str(page)})
    return url
 def _listing_base_key(url: str) -> str:
    p = urlparse(url)
    path = p.path.rstrip("/")
    return f"{p.scheme}://{p.netloc}{path}".lower()
 def _variant_cache_get(base_key: str) -> Optional[str]:
    info = _listing_variant_cache.get(base_key)
    if not info:
        return None
    url, ts = info
    if (now() - ts) > _listing_variant_ttl:
        _listing_variant_cache.pop(base_key, None)
        return None
    return url
 def _variant_cache_set(base_key: str, working_url: str) -> None:
    _listing_variant_cache[base_key] = (working_url, now())
 def _page_cache_get(working_url: str, page: int) -> Optional[Tuple[List[Dict], int]]:
    key = f"{working_url}|p={page}"
    info = _listing_page_cache.get(key)
    if not info:
        return None
    (items, total_pages), ts = info
    if (now() - ts) > _listing_page_ttl:
        _listing_page_cache.pop(key, None)
        return None
    return items, total_pages
 def _page_cache_set(working_url: str, page: int, items: List[Dict], total_pages: int) -> None:
    key = f"{working_url}|p={page}"
    _listing_page_cache[key] = ((items, total_pages), now())
 async def _fetch_parse(url: str, page: int):
    html = await fetch(_with_page(url, page))
    soup = soup_of(html)
    items = _parse_cards_from_soup(soup)
    return items, soup
 async def scrape_products(list_url: str, page: int = 1):
    """Fast listing fetch with variant memoization + page cache."""
    _listing_base_key(list_url)
    items, soup = await _fetch_parse(list_url, page)
    total_pages = _derive_total_pages(soup)
    return items, total_pages
 def _derive_total_pages(soup) -> int:
    total_pages = 1
    textdump = normalize_text(soup.get_text(" "))
    pages_from_text = parse_total_pages_from_text(textdump)
    if pages_from_text:
        total_pages = pages_from_text
    else:
        pages = {1}
        for a in soup.find_all("a", href=True):
            m = re.search(r"[?&]p=(\d+)", a["href"])
            if m:
                pages.add(int(m.group(1)))
        total_pages = max(pages) if pages else 1
    return total_pages
 def _slugs_from_list_url(list_url: str) -> Tuple[str, Optional[str]]:
    p = urlparse(list_url)
    parts = [x for x in (p.path or "").split("/") if x]
    top = parts[0].lower() if parts else ""
    sub = None
    if len(parts) >= 2:
        sub = parts[1]
        if sub.lower().endswith((".html", ".htm")):
            sub = re.sub(r"\.(html?|HTML?)$", "", sub)
    return top, sub
--- a/scrape/nav.py
+++ b/scrape/nav.py
@@ -0,0 +1,104 @@
 from __future__ import annotations
 import re
 from typing import Dict, List, Tuple, Optional
 from urllib.parse import urlparse, urljoin
 from bs4 import BeautifulSoup
 from config import config
 from .http_client import fetch  # only fetch; define soup_of locally
 #from .. import cache_backend as cb
 #from ..blacklist.category import is_category_blocked # Reverse map: slug -> label
 # ------------------ Caches ------------------
 def soup_of(html: str) -> BeautifulSoup:
    return BeautifulSoup(html or "", "lxml")
 def normalize_text(s: str) -> str:
    return re.sub(r"\s+", " ", (s or "").strip())
 async def scrape_nav_raw() -> List[Tuple[str, str]]:
    html = await fetch(config()["base_url"])
    soup = soup_of(html)
    results: List[Tuple[str, str]] = []
    for a in soup.find_all("a", href=True):
        text = normalize_text(a.get_text())
        if not text:
            continue
        href = a["href"].strip()
        if href.startswith("/"):
            href = urljoin(config()["base_url"], href)
        if not href.startswith(config()["base_url"]):
            continue
        results.append((text, href))
    return results
 def extract_sub_slug(href: str, top_slug: str) -> Optional[str]:
    p = urlparse(href)
    parts = [x for x in (p.path or "").split("/") if x]
    if len(parts) >= 2 and parts[0].lower() == top_slug.lower():
        sub = parts[1]
        if sub.lower().endswith((".html", ".htm")):
            sub = re.sub(r"\.(html?|HTML?)$", "", sub)
        return sub
    return None
 async def group_by_category(slug_to_links: Dict[str, List[Tuple[str, str]]]) -> Dict[str, Dict]:
    nav = {"cats": {}}
    for label, slug in config()["categories"]["allow"].items():
        top_href = urljoin(config()["base_url"], f"/{slug}")
        subs = []
        for text, href in slug_to_links.get(slug, []):
            sub_slug = extract_sub_slug(href, slug)
            if sub_slug:
                #list_url = _join(config()["base_url"], f"/{slug}/{sub_slug}")
                #log(f"naving [{slug}/{sub_slug}] page 1…")
                #items, total_pages = await scrape_products(list_url, page=1)
                #for p in range(2, total_pages + 1):
                #    log(f"naving [{slug}/{sub_slug}] page {p}…")
                #    moreitems, _tp = await scrape_products(list_url, page=p)
                #    items.extend(
                #      moreitems,
                #    )
                subs.append({"name": text, "href": href, "slug": sub_slug})
        subs.sort(key=lambda x: x["name"].lower())
        #list_url = _join(config()["base_url"], f"/{slug}")
        #log(f"naving [{slug}] page 1…")
        #items, total_pages = await scrape_products(list_url, page=1)
        #for p in range(2, total_pages + 1):
        #    log(f"naving [{slug}] page {p}…")
        #    moreitems, _tp = await scrape_products(list_url, page=p)
        #    items.extend(
        #      moreitems,
        #    )
        nav["cats"][label] = {"href": top_href, "slug": slug, "subs": subs}
    return nav
 async def scrape_nav_filtered() -> Dict[str, Dict]:
    anchors = await scrape_nav_raw()
    slug_to_links: Dict[str, List[Tuple[str, str]]] = {}
    for text, href in anchors:
        p = urlparse(href)
        parts = [x for x in (p.path or "").split("/") if x]
        if not parts:
            continue
        top = parts[0].lower()
        if top in config()["slugs"]["skip"]:
            continue
        slug_to_links.setdefault(top, []).append((text, href))
    return await group_by_category(slug_to_links)
 async def nav_scrape() -> Dict[str, Dict]:
    """Return navigation structure; use snapshot when offline."""
    nav = await scrape_nav_filtered()
    return nav
--- a/scrape/persist_api/init.py
+++ b/scrape/persist_api/init.py
@@ -0,0 +1,6 @@
 from .upsert_product import upsert_product
 from .log_product_result import log_product_result
 from .save_nav import save_nav
 from .save_subcategory_redirects import save_subcategory_redirects
 from .capture_listing import capture_listing
--- a/scrape/persist_api/capture_listing.py
+++ b/scrape/persist_api/capture_listing.py
@@ -0,0 +1,27 @@
 # replace your existing upsert_product with this version
 import os
 import httpx
 from typing import List
 async def capture_listing(
    url: str,
    items: List[str],
    total_pages: int
 ):
    sync_url = os.getenv("CAPTURE_LISTING_URL", "http://localhost:8000/market/api/products/listing/")
    async with httpx.AsyncClient(timeout=httpx.Timeout(20.0, connect=10.0)) as client:
        _d = {
            "url": url,
            "items": items,
            "total_pages": total_pages
        }
        resp = await client.post(sync_url, json=_d)
    # Raise for non-2xx
    resp.raise_for_status()
    data = resp.json() if resp.content else {}
    return data
--- a/scrape/persist_api/log_product_result.py
+++ b/scrape/persist_api/log_product_result.py
@@ -0,0 +1,24 @@
 # replace your existing upsert_product with this version
 import os
 import httpx
 async def log_product_result(
        ok: bool,
        payload
 ):
    sync_url = os.getenv("PRODUCT_LOG_URL", "http://localhost:8000/market/api/products/log/")
    async with httpx.AsyncClient(timeout=httpx.Timeout(20.0, connect=10.0)) as client:
        _d = {
            "ok": ok,
            "payload": payload
        }
        resp = await client.post(sync_url, json=_d)
    # Raise for non-2xx
    resp.raise_for_status()
    data = resp.json() if resp.content else {}
    return data
--- a/scrape/persist_api/save_nav.py
+++ b/scrape/persist_api/save_nav.py
@@ -0,0 +1,19 @@
 # replace your existing upsert_product with this version
 import os
 import httpx
 from typing import Dict
 async def save_nav(
    nav: Dict,
 ):
    sync_url = os.getenv("SAVE_NAV_URL", "http://localhost:8000/market/api/products/nav/")
    async with httpx.AsyncClient(timeout=httpx.Timeout(20.0, connect=10.0)) as client:
        resp = await client.post(sync_url, json=nav)
    # Raise for non-2xx
    resp.raise_for_status()
    data = resp.json() if resp.content else {}
    return data
--- a/scrape/persist_api/save_subcategory_redirects.py
+++ b/scrape/persist_api/save_subcategory_redirects.py
@@ -0,0 +1,15 @@
 import os
 import httpx
 from typing import Dict
 async def save_subcategory_redirects(mapping: Dict[str, str]) -> None:
    sync_url = os.getenv("SAVE_REDIRECTS", "http://localhost:8000/market/api/products/redirects/")
    async with httpx.AsyncClient(timeout=httpx.Timeout(20.0, connect=10.0)) as client:
        resp = await client.post(sync_url, json=mapping)
    # Raise for non-2xx
    resp.raise_for_status()
    data = resp.json() if resp.content else {}
    return data
--- a/scrape/persist_api/upsert_product.py
+++ b/scrape/persist_api/upsert_product.py
@@ -0,0 +1,256 @@
 # replace your existing upsert_product with this version
 import os
 import httpx
 from typing import Dict, List, Any
 async def upsert_product(
        slug,
        href,
        d,
 ):
    """
    Posts the given product dict `d` to the /api/products/sync endpoint.
    Keeps the same signature as before and preserves logging/commit behavior.
    """
    # Ensure slug in payload matches the function arg if present
    if not d.get("slug"):
        d["slug"] = slug
    # Where to post; override via env if needed
    sync_url = os.getenv("PRODUCT_SYNC_URL", "http://localhost:8000/market/api/products/sync/")
    payload = _massage_payload(d)
    async def _do_call() -> Dict[str, Any]:
        async with httpx.AsyncClient(timeout=httpx.Timeout(20.0, connect=10.0)) as client:
            resp = await client.post(sync_url, json=payload)
            resp.raise_for_status()
            # tolerate empty body
            if not resp.content:
                return {}
            # prefer JSON if possible, otherwise return text
            try:
                return resp.json()
            except ValueError:
                return {"raw": resp.text}
    async def _log_error(exc: BaseException) -> None:
        # Optional: add your own logging here
        print(f"[upsert_product] POST failed: {type(exc).__name__}: {exc}. Retrying in 5s... slug={slug} url={sync_url}")
    return await retry_until_success(_do_call, delay=5.0, on_error=_log_error)
    #async with httpx.AsyncClient(timeout=httpx.Timeout(20.0, connect=10.0)) as client:
    #    _d=_massage_payload(d)
    #    resp = await client.post(sync_url, json=_d)
    # Raise for non-2xx
    #resp.raise_for_status()
    #data = resp.json() if resp.content else {}
    #return data
 import asyncio
 from typing import Any, Awaitable, Callable, Dict, Optional
 async def retry_until_success(
    fn: Callable[[], Awaitable[Any]],
    *,
    delay: float = 5.0,
    on_error: Optional[Callable[[BaseException], Awaitable[None]]] = None,
 ) -> Any:
    """
    Repeatedly call the async no-arg function `fn` until it succeeds (returns without raising).
    Waits `delay` seconds between attempts. Never gives up.
    If provided, `on_error(exc)` is awaited after each failure.
    """
    attempt = 0
    while True:
        try:
            return await fn()
        except asyncio.CancelledError:
            # bubble up cancellations immediately
            raise
        except BaseException as exc:
            attempt += 1
            if on_error is not None:
                try:
                    await on_error(exc)
                except Exception:
                    # don't let error handler failures prevent retrying
                    pass
            # fallback stderr log if no on_error handler
            if on_error is None:
                print(f"[retry] attempt {attempt} failed: {type(exc).__name__}: {exc}")
            await asyncio.sleep(delay)
 def _get(d, key, default=None):
    v = d.get(key)
    return default if v in (None, "", [], {}) else v
 def _massage_payload(d: Dict[str, Any]) -> Dict[str, Any]:
    """Mirror the DB-upsert massaging so the API sees the same structure/values."""
    slug = d.get("slug")
    if not slug:
        raise ValueError("product missing slug")
    # --- Top-level fields (use _get where DB upsert uses it) ---
    out: Dict[str, Any] = {
        "slug": slug,
        "title": _get(d, "title"),
        "image": _get(d, "image"),
        "description_short": _get(d, "description_short"),
        "description_html": _get(d, "description_html"),
        "suma_href": _get(d, "suma_href"),
        "brand": _get(d, "brand"),
        "rrp": _get(d, "rrp"),
        "rrp_currency": _get(d, "rrp_currency"),
        "rrp_raw": _get(d, "rrp_raw"),
        "price_per_unit": _get(d, "price_per_unit"),
        "price_per_unit_currency": _get(d, "price_per_unit_currency"),
        "price_per_unit_raw": _get(d, "price_per_unit_raw"),
        "special_price": _get(d, "special_price"),
        "special_price_currency": _get(d, "special_price_currency"),
        "special_price_raw": _get(d, "special_price_raw"),
        "regular_price": _get(d, "regular_price"),
        "regular_price_currency": _get(d, "regular_price_currency"),
        "regular_price_raw": _get(d, "regular_price_raw"),
        "case_size_count": _get(d, "case_size_count"),
        "case_size_item_qty": _get(d, "case_size_item_qty"),
        "case_size_item_unit": _get(d, "case_size_item_unit"),
        "case_size_raw": _get(d, "case_size_raw"),
        "ean": d.get("ean") or d.get("barcode") or None,
        "sku": d.get("sku"),
        "unit_size": d.get("unit_size"),
        "pack_size": d.get("pack_size"),
    }
    # --- Sections: only dicts with title+html (like DB sync) ---
    sections_in = d.get("sections") or []
    sections_out: List[Dict[str, Any]] = []
    for sec in sections_in:
        if isinstance(sec, dict) and sec.get("title") and sec.get("html"):
            sections_out.append({"title": sec["title"], "html": sec["html"]})
    out["sections"] = sections_out
    # --- Images: same 3 buckets used in DB sync ---
    def _coerce_str_list(x):
        if not x:
            return []
        # accept list of strings or list of dicts with {"url": ...}
        out_urls = []
        for item in x:
            if isinstance(item, str):
                if item:
                    out_urls.append(item)
            elif isinstance(item, dict):
                u = item.get("url")
                if u:
                    out_urls.append(u)
        return out_urls
    out["images"] = _coerce_str_list(d.get("images"))
    out["embedded_image_urls"] = _coerce_str_list(d.get("embedded_image_urls"))
    out["all_image_urls"] = _coerce_str_list(d.get("all_image_urls"))
    # --- Labels: strip (DB code trims) ---
    labels_in = d.get("labels") or []
    out["labels"] = [str(x).strip() for x in labels_in if x]
    # --- Stickers: strip + lower (DB code lower-cases) ---
    stickers_in = d.get("stickers") or []
    out["stickers"] = [str(x).strip().lower() for x in stickers_in if x]
    # --- Attributes: pass through the same dict sources the DB code reads ---
    out["info_table"] = d.get("info_table") or {}
    #out["oe_list_price"] = d.get("oe_list_price") or {}
    # --- Nutrition: allow dict or list of dicts, mirroring DB code ---
    nutrition = d.get("nutrition") or []
    if isinstance(nutrition, dict):
        out["nutrition"] = {str(k).strip(): (None if v is None else str(v)) for k, v in nutrition.items()}
    elif isinstance(nutrition, list):
        rows = []
        for row in nutrition:
            if not isinstance(row, dict):
                continue
            key = str(row.get("key") or "").strip()
            if not key:
                continue
            rows.append({
                "key": key,
                "value": None if row.get("value") is None else str(row.get("value")),
                "unit": None if row.get("unit") is None else str(row.get("unit")),
            })
        out["nutrition"] = rows
    else:
        out["nutrition"] = []
    # --- Allergens: accept str (→ contains=True) or dict ---
    alls_in = d.get("allergens") or []
    alls_out = []
    for a in alls_in:
        if isinstance(a, str):
            nm, contains = a.strip(), True
        elif isinstance(a, dict):
            nm, contains = (a.get("name") or "").strip(), bool(a.get("contains", True))
        else:
            continue
        if nm:
            alls_out.append({"name": nm, "contains": contains})
    out["allergens"] = alls_out
    out["images"]=[
        {"url": s.strip(), "kind": "gallery", "position": i}
        for i, s in enumerate(out.get("images") or [])
        if isinstance(s, str) and s.strip()
    ] + [
        {"url": s.strip(), "kind": "embedded", "position": i}
        for i, s in enumerate(out.get("embedded_image_urls") or [])
        if isinstance(s, str) and s.strip()
    ] + [
        {"url": s.strip(), "kind": "all", "position": i}
        for i, s in enumerate(out.get("all_image_urls") or [])
        if isinstance(s, str) and s.strip()
    ]
    out["labels"]= [{"name": s.strip()} for s in out["labels"] if isinstance(s, str) and s.strip()]
    out["stickers"]= [{"name": s.strip()} for s in out["stickers"] if isinstance(s, str) and s.strip()]
    out["attributes"] = build_attributes_list(d)
    return out
 def build_attributes_list(d: Dict[str, Any]) -> List[Dict[str, Any]]:
    attrs = []
    for src, prefix in [
        (d.get("info_table") or {}, "info_table"),
        (d.get("oe_list_price") or {}, "oe_list_price"),
    ]:
        for k, v in src.items():
            key = f"{prefix}/{str(k).strip()}"
            val = None if v is None else str(v)
            attrs.append({"key": key, "value": val})
    # optional: dedupe by (key, value)
    seen = set()
    dedup = []
    for item in attrs:
        t = (item["key"], item["value"])
        if t in seen:
            continue
        seen.add(t)
        dedup.append(item)
    return dedup
--- a/scrape/persist_snapshot/init.py
+++ b/scrape/persist_snapshot/init.py
@@ -0,0 +1,7 @@
 from .log_product_result import log_product_result
 from .upsert_product import upsert_product
 from .save_nav import save_nav
 from .capture_listing import capture_listing
 from .save_link_reports import save_link_reports
 from .save_subcategory_redirects import save_subcategory_redirects
--- a/scrape/persist_snapshot/_get.py
+++ b/scrape/persist_snapshot/_get.py
@@ -0,0 +1,3 @@
 def _get(d, key, default=None):
    v = d.get(key)
    return default if v in (None, "", [], {}) else v
--- a/scrape/persist_snapshot/capture_listing.py
+++ b/scrape/persist_snapshot/capture_listing.py
@@ -0,0 +1,137 @@
 # at top of persist_snapshot.py:
 from typing import Optional, List
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import List, Optional, Tuple
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from datetime import datetime
 from sqlalchemy import (
    select, update
 )
 from urllib.parse import urlparse
 import re
 from models.market import (
    NavTop,
    NavSub,
    Listing,
    ListingItem,
 )
 from db.session import get_session
 # --- Models are unchanged, see original code ---
 # ---------------------- Helper fns called from scraper ------------------------
 async def capture_listing(
      #product_slugs: Set[str],
      url: str,
      items: List[str],
      total_pages: int
  ) -> None:
  async with get_session() as session:
      await _capture_listing(
          session,
          url,
          items,
          total_pages   
      )
      await session.commit()
 async def _capture_listing(
      session,
      url: str,
      items: List[str],
      total_pages: int
  ) -> None:
      top_id, sub_id = await _nav_ids_from_list_url(session, url)
      await _save_listing(session, top_id, sub_id, items, total_pages)
 async def _save_listing(session: AsyncSession, top_id: int, sub_id: Optional[int],
                       items: List[str], total_pages: Optional[int]) -> None:
    res = await session.execute(
        select(Listing).where(Listing.top_id == top_id, Listing.sub_id == sub_id, Listing.deleted_at.is_(None))
    )
    listing = res.scalar_one_or_none()
    if not listing:
        listing = Listing(top_id=top_id, sub_id=sub_id, total_pages=total_pages)
        session.add(listing)
        await session.flush()
    else:
        listing.total_pages = total_pages
    # Normalize and deduplicate incoming slugs
    seen: set[str] = set()
    deduped: list[str] = []
    for s in items or []:
        if s and isinstance(s, str) and s not in seen:
            seen.add(s)
            deduped.append(s)
    if not deduped:
        return
    # Fetch existing slugs from the database
    res = await session.execute(
        select(ListingItem.slug)
        .where(ListingItem.listing_id == listing.id, ListingItem.deleted_at.is_(None))
    )
    existing_slugs = set(res.scalars().all())
    now = datetime.utcnow()
    # Slugs to delete (present in DB but not in the new data)
    to_delete = existing_slugs - seen
    if to_delete:
        await session.execute(
            update(ListingItem)
            .where(
                ListingItem.listing_id == listing.id,
                ListingItem.slug.in_(to_delete),
                ListingItem.deleted_at.is_(None)
            )
            .values(deleted_at=now)
        )
    # Slugs to insert (new ones not in DB)
    to_insert = seen - existing_slugs
    if to_insert:
        stmt = pg_insert(ListingItem).values(
            [{"listing_id": listing.id, "slug": s} for s in to_insert]
        )
        #.on_conflict_do_nothing(
        #    constraint="uq_listing_items_listing_slug"
        #)
        await session.execute(stmt)
 async def _nav_ids_from_list_url(session: AsyncSession, list_url: str) -> Tuple[int, Optional[int]]:
    parts = [x for x in (urlparse(list_url).path or "").split("/") if x]
    top_slug = parts[0].lower() if parts else ""
    sub_slug = None
    if len(parts) >= 2:
        sub_slug = parts[1]
        if sub_slug.lower().endswith((".html", ".htm")):
            sub_slug = re.sub(r"\\.(html?|HTML?)$", "", sub_slug)
    return await _get_nav_ids(session, top_slug, sub_slug)
 async def _get_nav_ids(session: AsyncSession, top_slug: str, sub_slug: Optional[str]) -> Tuple[int, Optional[int]]:
    res_top = await session.execute(select(NavTop.id).where(NavTop.slug == top_slug, NavTop.deleted_at.is_(None)))
    top_id = res_top.scalar_one_or_none()
    if not top_id:
        raise ValueError(f"NavTop not found for slug: {top_slug}")
    sub_id = None
    if sub_slug:
        res_sub = await session.execute(
            select(NavSub.id).where(NavSub.slug == sub_slug, NavSub.top_id == top_id, NavSub.deleted_at.is_(None))
        )
        sub_id = res_sub.scalar_one_or_none()
        if sub_id is None:
            raise ValueError(f"NavSub not found for slug: {sub_slug} under top_id={top_id}")
    return top_id, sub_id
--- a/scrape/persist_snapshot/log_product_result.py
+++ b/scrape/persist_snapshot/log_product_result.py
@@ -0,0 +1,35 @@
 # at top of persist_snapshot.py:
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import Dict
 from models.market import (
    ProductLog,
 )
 from db.session import get_session
 async def log_product_result(ok: bool, payload: Dict) -> None:
    async with get_session() as session:
        await _log_product_result(session, ok, payload)
        await session.commit()
 async def _log_product_result(session: AsyncSession, ok: bool, payload: Dict) -> None:
    session.add(ProductLog(
        ok=ok,
        slug=payload.get("slug"),
        href_tried=payload.get("href_tried"),
        error_type=payload.get("error_type"),
        error_message=payload.get("error_message"),
        http_status=payload.get("http_status"),
        final_url=payload.get("final_url"),
        transport_error=payload.get("transport_error"),
        title=payload.get("title"),
        has_description_html=payload.get("has_description_html"),
        has_description_short=payload.get("has_description_short"),
        sections_count=payload.get("sections_count"),
        images_count=payload.get("images_count"),
        embedded_images_count=payload.get("embedded_images_count"),
        all_images_count=payload.get("all_images_count"),
    ))
--- a/scrape/persist_snapshot/save_link_reports.py
+++ b/scrape/persist_snapshot/save_link_reports.py
@@ -0,0 +1,29 @@
 # at top of persist_snapshot.py:
 from typing import List
 from typing import Dict, List
 from models.market import (
    LinkError,
    LinkExternal,
 )
 from db.session import get_session
 # --- Models are unchanged, see original code ---
 # ---------------------- Helper fns called from scraper ------------------------
 async def save_link_reports(link_errors: List[Dict], link_externals: List[Dict]) -> None:
    async with get_session() as session:
      for e in link_errors:
          session.add(LinkError(
              product_slug=e.get("product"), href=e.get("href"), text=e.get("text"),
              top=e.get("top"), sub=e.get("sub"), target_slug=e.get("target_slug"), type=e.get("type"),
          ))
      for e in link_externals:
          session.add(LinkExternal(
              product_slug=e.get("product"), href=e.get("href"), text=e.get("text"), host=e.get("host"),
          ))
      await session.commit()
--- a/scrape/persist_snapshot/save_nav.py
+++ b/scrape/persist_snapshot/save_nav.py
@@ -0,0 +1,108 @@
 # at top of persist_snapshot.py:
 from datetime import datetime
 from sqlalchemy import (
     select, tuple_
 )
 from typing import Dict
 from models.market import (
    NavTop,
    NavSub,
 )
 from db.session import get_session
 async def save_nav(nav: Dict) -> None:
    async with get_session() as session:
        await _save_nav(session, nav)
        await session.commit()
 async def _save_nav(session, nav: Dict) -> None:
    print('===================SAVE NAV========================')
    print(nav)
    now = datetime.utcnow()
    incoming_top_slugs = set()
    incoming_sub_keys = set()  # (top_slug, sub_slug)
    # First pass: collect slugs
    for label, data in (nav.get("cats") or {}).items():
        top_slug = (data or {}).get("slug")
        if not top_slug:
            continue
        incoming_top_slugs.add(top_slug)
        for s in (data.get("subs") or []):
            sub_slug = s.get("slug")
            if sub_slug:
                incoming_sub_keys.add((top_slug, sub_slug))
    # Soft-delete stale NavSub entries
    # This requires joining NavTop to access top_slug
    subs_to_delete = await session.execute(
        select(NavSub)
        .join(NavTop, NavSub.top_id == NavTop.id)
        .where(
            NavSub.deleted_at.is_(None),
            ~tuple_(NavTop.slug, NavSub.slug).in_(incoming_sub_keys)
        )
    )
    for sub in subs_to_delete.scalars():
        sub.deleted_at = now
    # Soft-delete stale NavTop entries
    tops_to_delete = await session.execute(
        select(NavTop)
        .where(
            NavTop.deleted_at.is_(None),
            ~NavTop.slug.in_(incoming_top_slugs)
        )
    )
    for top in tops_to_delete.scalars():
        top.deleted_at = now
    await session.flush()
    # Upsert NavTop and NavSub
    for label, data in (nav.get("cats") or {}).items():
        top_slug = (data or {}).get("slug")
        if not top_slug:
            continue
        res = await session.execute(
            select(NavTop).where(NavTop.slug == top_slug)
        )
        top = res.scalar_one_or_none()
        if top:
            top.label = label
            top.deleted_at = None
        else:
            top = NavTop(label=label, slug=top_slug)
            session.add(top)
        await session.flush()
        for s in (data.get("subs") or []):
            sub_slug = s.get("slug")
            if not sub_slug:
                continue
            sub_label = s.get("label")
            sub_href = s.get("href")
            res_sub = await session.execute(
                select(NavSub).where(
                    NavSub.slug == sub_slug,
                    NavSub.top_id == top.id
                )
            )
            sub = res_sub.scalar_one_or_none()
            if sub:
                sub.label = sub_label
                sub.href = sub_href
                sub.deleted_at = None
            else:
                session.add(NavSub(top_id=top.id, label=sub_label, slug=sub_slug, href=sub_href))
--- a/scrape/persist_snapshot/save_subcategory_redirects.py
+++ b/scrape/persist_snapshot/save_subcategory_redirects.py
@@ -0,0 +1,32 @@
 # at top of persist_snapshot.py:
 from typing import Dict
 from datetime import datetime
 from sqlalchemy import (
    update
 )
 from models.market import (
    SubcategoryRedirect,
 )
 from db.session import get_session
 # --- Models are unchanged, see original code ---
 # ---------------------- Helper fns called from scraper ------------------------
 async def save_subcategory_redirects(mapping: Dict[str, str]) -> None:
    async with get_session() as session:
        await _save_subcategory_redirects(session, mapping)
        await session.commit()
 async def _save_subcategory_redirects(session, mapping: Dict[str, str]) -> None:
    await session.execute(update(SubcategoryRedirect).where(SubcategoryRedirect.deleted_at.is_(None)).values(deleted_at=datetime.utcnow()))
    for old, new in mapping.items():
        session.add(SubcategoryRedirect(old_path=old, new_path=new))
  #for slug in items:
  #    product_slugs.add(slug)
--- a/scrape/persist_snapshot/upsert_product.py
+++ b/scrape/persist_snapshot/upsert_product.py
@@ -0,0 +1,237 @@
 # at top of persist_snapshot.py:
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import Dict
 from datetime import datetime
 from sqlalchemy import (
    func, select, update
 )
 from models.market import (
    Product,
    ProductImage,
    ProductSection,
    ProductLabel,
    ProductSticker,
    ProductAttribute,
    ProductNutrition,
    ProductAllergen
 )
 from db.session import get_session
 from ._get import _get
 from .log_product_result import _log_product_result
 # --- Models are unchanged, see original code ---
 # ---------------------- Helper fns called from scraper ------------------------
 async def _upsert_product(session: AsyncSession, d: Dict) -> Product:
    slug = d.get("slug")
    if not slug:
        raise ValueError("product missing slug")
    res = await session.execute(select(Product).where(Product.slug == slug, Product.deleted_at.is_(None)))
    p = res.scalar_one_or_none()
    if not p:
        p = Product(slug=slug)
        session.add(p)
    p.title = _get(d, "title")
    p.image = _get(d, "image")
    p.description_short = _get(d, "description_short")
    p.description_html = _get(d, "description_html")
    p.suma_href = _get(d, "suma_href")
    p.brand = _get(d, "brand")
    p.rrp = _get(d, "rrp")
    p.rrp_currency = _get(d, "rrp_currency")
    p.rrp_raw = _get(d, "rrp_raw")
    p.price_per_unit = _get(d, "price_per_unit")
    p.price_per_unit_currency = _get(d, "price_per_unit_currency")
    p.price_per_unit_raw = _get(d, "price_per_unit_raw")
    p.special_price = _get(d, "special_price")
    p.special_price_currency = _get(d, "special_price_currency")
    p.special_price_raw = _get(d, "special_price_raw")
    p.regular_price = _get(d, "regular_price")
    p.regular_price_currency = _get(d, "regular_price_currency")
    p.regular_price_raw = _get(d, "regular_price_raw")
    p.case_size_count = _get(d, "case_size_count")
    p.case_size_item_qty = _get(d, "case_size_item_qty")
    p.case_size_item_unit = _get(d, "case_size_item_unit")
    p.case_size_raw = _get(d, "case_size_raw")
    p.ean = d.get("ean") or d.get("barcode") or None
    p.sku = d.get("sku")
    p.unit_size = d.get("unit_size")
    p.pack_size = d.get("pack_size")
    p.updated_at = func.now()
    now = datetime.utcnow()
    # ProductSection sync
    existing_sections = await session.execute(select(ProductSection).where(ProductSection.product_id == p.id, ProductSection.deleted_at.is_(None)))
    existing_sections_set = {(s.title, s.html) for s in existing_sections.scalars()}
    new_sections_set = set()
    for sec in d.get("sections") or []:
        if isinstance(sec, dict) and sec.get("title") and sec.get("html"):
            new_sections_set.add((sec["title"], sec["html"]))
            if (sec["title"], sec["html"]) not in existing_sections_set:
                session.add(ProductSection(product_id=p.id, title=sec["title"], html=sec["html"]))
    for s in existing_sections_set - new_sections_set:
        await session.execute(update(ProductSection).where(ProductSection.product_id == p.id, ProductSection.title == s[0], ProductSection.html == s[1], ProductSection.deleted_at.is_(None)).values(deleted_at=now))
    # ProductImage sync
    existing_images = await session.execute(select(ProductImage).where(ProductImage.product_id == p.id, ProductImage.deleted_at.is_(None)))
    existing_images_set = {(img.url, img.kind) for img in existing_images.scalars()}
    new_images_set = set()
    for kind, urls in [
        ("gallery", d.get("images") or []),
        ("embedded", d.get("embedded_image_urls") or []),
        ("all", d.get("all_image_urls") or []),
    ]:
        for idx, url in enumerate(urls):
            if url:
                new_images_set.add((url, kind))
                if (url, kind) not in existing_images_set:
                    session.add(ProductImage(product_id=p.id, url=url, position=idx, kind=kind))
    for img in existing_images_set - new_images_set:
        await session.execute(update(ProductImage).where(ProductImage.product_id == p.id, ProductImage.url == img[0], ProductImage.kind == img[1], ProductImage.deleted_at.is_(None)).values(deleted_at=now))
    # ProductLabel sync
    existing_labels = await session.execute(select(ProductLabel).where(ProductLabel.product_id == p.id, ProductLabel.deleted_at.is_(None)))
    existing_labels_set = {label.name.strip() for label in existing_labels.scalars()}
    new_labels = {str(name).strip() for name in (d.get("labels") or []) if name}
    for name in new_labels - existing_labels_set:
        session.add(ProductLabel(product_id=p.id, name=name))
    for name in existing_labels_set - new_labels:
        await session.execute(update(ProductLabel).where(ProductLabel.product_id == p.id, ProductLabel.name == name, ProductLabel.deleted_at.is_(None)).values(deleted_at=now))
    # ProductSticker sync
    existing_stickers = await session.execute(select(ProductSticker).where(ProductSticker.product_id == p.id, ProductSticker.deleted_at.is_(None)))
    existing_stickers_set = {sticker.name.strip() for sticker in existing_stickers.scalars()}
    new_stickers = {str(name).strip().lower() for name in (d.get("stickers") or []) if name}
    for name in new_stickers - existing_stickers_set:
        session.add(ProductSticker(product_id=p.id, name=name))
    for name in existing_stickers_set - new_stickers:
        await session.execute(update(ProductSticker).where(ProductSticker.product_id == p.id, ProductSticker.name == name, ProductSticker.deleted_at.is_(None)).values(deleted_at=now))
    # ProductAttribute sync
    existing_attrs = await session.execute(select(ProductAttribute).where(ProductAttribute.product_id == p.id, ProductAttribute.deleted_at.is_(None)))
    existing_attrs_set = {(a.key, a.value) for a in existing_attrs.scalars()}
    new_attrs_set = set()
    for src, prefix in [(d.get("info_table") or {}, "info_table"), (d.get("oe_list_price") or {}, "oe_list_price")]:
        for k, v in src.items():
            key = f"{prefix}/{str(k).strip()}"
            val = None if v is None else str(v)
            new_attrs_set.add((key, val))
            if (key, val) not in existing_attrs_set:
                session.add(ProductAttribute(product_id=p.id, key=key, value=val))
    for key, val in existing_attrs_set - new_attrs_set:
        await session.execute(update(ProductAttribute).where(ProductAttribute.product_id == p.id, ProductAttribute.key == key, ProductAttribute.value == val, ProductAttribute.deleted_at.is_(None)).values(deleted_at=now))
    # ProductNutrition sync
    existing_nuts = await session.execute(select(ProductNutrition).where(ProductNutrition.product_id == p.id, ProductNutrition.deleted_at.is_(None)))
    existing_nuts_set = {(n.key, n.value, n.unit) for n in existing_nuts.scalars()}
    new_nuts_set = set()
    nutrition = d.get("nutrition") or []
    if isinstance(nutrition, dict):
        for k, v in nutrition.items():
            key, val = str(k).strip(), str(v) if v is not None else None
            new_nuts_set.add((key, val, None))
            if (key, val, None) not in existing_nuts_set:
                session.add(ProductNutrition(product_id=p.id, key=key, value=val, unit=None))
    elif isinstance(nutrition, list):
        for row in nutrition:
            try:
                key = str(row.get("key") or "").strip()
                val = None if row.get("value") is None else str(row.get("value"))
                unit = None if row.get("unit") is None else str(row.get("unit"))
                if key:
                    new_nuts_set.add((key, val, unit))
                    if (key, val, unit) not in existing_nuts_set:
                        session.add(ProductNutrition(product_id=p.id, key=key, value=val, unit=unit))
            except Exception:
                continue
    for key, val, unit in existing_nuts_set - new_nuts_set:
        await session.execute(update(ProductNutrition).where(ProductNutrition.product_id == p.id, ProductNutrition.key == key, ProductNutrition.value == val, ProductNutrition.unit == unit, ProductNutrition.deleted_at.is_(None)).values(deleted_at=now))
    # ProductAllergen sync
    existing_allergens = await session.execute(select(ProductAllergen).where(ProductAllergen.product_id == p.id, ProductAllergen.deleted_at.is_(None)))
    existing_allergens_set = {(a.name, a.contains) for a in existing_allergens.scalars()}
    new_allergens_set = set()
    for a in d.get("allergens") or []:
        if isinstance(a, str):
            nm, contains = a.strip(), True
        elif isinstance(a, dict):
            nm, contains = (a.get("name") or "").strip(), bool(a.get("contains", True))
        else:
            continue
        if nm:
            new_allergens_set.add((nm, contains))
            if (nm, contains) not in existing_allergens_set:
                session.add(ProductAllergen(product_id=p.id, name=nm, contains=contains))
    for name, contains in existing_allergens_set - new_allergens_set:
        await session.execute(update(ProductAllergen).where(ProductAllergen.product_id == p.id, ProductAllergen.name == name, ProductAllergen.contains == contains, ProductAllergen.deleted_at.is_(None)).values(deleted_at=now))
    await session.flush()
    return p
 async def upsert_product(
        slug,
        href,
        d,
 ):    
    async with get_session() as session:
        try:
            await _upsert_product(session, d)
            await _log_product_result(session, ok=True, payload={
                "slug": slug,
                "href_tried": href,
                "title": d.get("title"),
                "has_description_html": bool(d.get("description_html")),
                "has_description_short": bool(d.get("description_short")),
                "sections_count": len(d.get("sections") or []),
                "images_count": len(d.get("images")),
                "embedded_images_count": len(d.get("embedded_image_urls")),
                "all_images_count": len(d.get("all_image_urls")),
            })
        except Exception as e:
            print(f"[ERROR] Failed to upsert product '{d.get('slug')}'")
            print(f"  Title: {d}.get('title')")
            print(f"  URL: {d.get('suma_href')}")
            print(f"  Error type: {type(e).__name__}")
            print(f"  Error message: {str(e)}")
            import traceback
            traceback.print_exc()
            await _log_product_result(session, ok=False, payload={
                "slug": d.get("slug"),
                "href_tried": d.get("suma_href"),
                "error_type": type(e).__name__,
                "error_message": str(e),
                "title": d.get("title"),
            })
            raise
        await session.commit()
--- a/scrape/product/init.py
+++ b/scrape/product/init.py
@@ -0,0 +1 @@
--- a/scrape/product/extractors/init.py
+++ b/scrape/product/extractors/init.py
@@ -0,0 +1,13 @@
 # Auto-import all extractor modules so they register themselves.
 from .title import ex_title          # noqa: F401
 from .images import ex_images        # noqa: F401
 from .short_description import ex_short_description  # noqa: F401
 from .description_sections import ex_description_sections  # noqa: F401
 from .nutrition_ex import ex_nutrition  # noqa: F401
 from .stickers import ex_stickers    # noqa: F401
 from .labels import ex_labels        # noqa: F401
 from .info_table import ex_info_table  # noqa: F401
 from .oe_list_price import ex_oe_list_price  # noqa: F401
 from .regular_price_fallback import ex_regular_price_fallback  # noqa: F401
 from .breadcrumbs import ex_breadcrumbs  # noqa: F401
--- a/scrape/product/extractors/breadcrumbs.py
+++ b/scrape/product/extractors/breadcrumbs.py
@@ -0,0 +1,68 @@
 from __future__ import annotations
 from typing import Dict, List, Union
 from urllib.parse import urlparse
 from bs4 import BeautifulSoup
 from utils import normalize_text
 from ..registry import extractor
@extractor
 def ex_breadcrumbs(soup: BeautifulSoup, url: str) -> Dict:
    """
    Parse breadcrumbs to identify top and sub categories.
    """
    bc_ul = (soup.select_one(".breadcrumbs ul.items")
             or soup.select_one("nav.breadcrumbs ul.items")
             or soup.select_one("ul.items"))
    if not bc_ul:
        return {}
    crumbs = []
    for li in bc_ul.select("li.item"):
        a = li.find("a")
        if a:
            title = normalize_text(a.get("title") or a.get_text())
            href = a.get("href")
        else:
            title = normalize_text(li.get_text())
            href = None
        slug = None
        if href:
            try:
                p = urlparse(href)
                path = (p.path or "").strip("/")
                slug = path.split("/")[-1] if path else None
            except Exception:
                slug = None
        if slug:
          crumbs.append({"title": title or None, "href": href or None, "slug": slug})
    category_links = [c for c in crumbs if c.get("href")]
    top = None
    sub = None
    for c in category_links:
        t = (c.get("title") or "").lower()
        s = (c.get("slug") or "").lower()
        if t == "home" or s in ("", "home"):
            continue
        if top is None:
            top = c
            continue
        if sub is None:
            sub = c
            break
    out: Dict[str, Union[str, List[Dict[str, str]]]] = {
        "category_breadcrumbs": crumbs
    }
    if top:
        out["category_top_title"] = top.get("title")
        out["category_top_href"] = top.get("href")
        out["category_top_slug"] = top.get("slug")
    if sub:
        out["category_sub_title"] = sub.get("title")
        out["category_sub_href"] = sub.get("href")
        out["category_sub_slug"] = sub.get("slug")
    if top and sub:
        out["category_path"] = f"{(top.get('slug') or '').strip()}/{(sub.get('slug') or '').strip()}"
    return out
--- a/scrape/product/extractors/description_sections.py
+++ b/scrape/product/extractors/description_sections.py
@@ -0,0 +1,43 @@
 from __future__ import annotations
 from typing import Dict, List
 from bs4 import BeautifulSoup
 from utils import normalize_text
 from ...html_utils import absolutize_fragment
 from ..registry import extractor
 from ..helpers.desc import (
    split_description_container, find_description_container,
    pair_title_content_from_magento_tabs, scan_headings_for_sections,
    additional_attributes_table,
 )
 from ..helpers.text import clean_title, is_blacklisted_heading
@extractor
 def ex_description_sections(soup: BeautifulSoup, url: str) -> Dict:
    description_html = None
    sections: List[Dict] = []
    desc_el = find_description_container(soup)
    if desc_el:
        open_html, sections_from_desc = split_description_container(desc_el)
        description_html = open_html or None
        sections.extend(sections_from_desc)
    existing = {s["title"].lower() for s in sections}
    for t, html_fragment in (pair_title_content_from_magento_tabs(soup) or scan_headings_for_sections(soup)):
        low = t.lower()
        if "product description" in low or low == "description" or "details" in low:
            if not description_html and html_fragment:
                description_html = absolutize_fragment(html_fragment)
            continue
        if t.lower() not in existing and normalize_text(BeautifulSoup(html_fragment, "lxml").get_text()):
            if not is_blacklisted_heading(t):
                sections.append({"title": clean_title(t), "html": absolutize_fragment(html_fragment)})
                existing.add(t.lower())
    addl = additional_attributes_table(soup)
    if addl and "additional information" not in existing and not is_blacklisted_heading("additional information"):
        sections.append({"title": "Additional Information", "html": addl})
    out = {"sections": sections}
    if description_html:
        out["description_html"] = description_html
    return out
--- a/scrape/product/extractors/images.py
+++ b/scrape/product/extractors/images.py
@@ -0,0 +1,89 @@
 from __future__ import annotations
 import json, re
 from typing import Dict, List
 from bs4 import BeautifulSoup
 from ..registry import extractor
 from ..helpers.html import abs_url, collect_img_candidates, dedup_by_filename
@extractor
 def ex_images(soup: BeautifulSoup, url: str) -> Dict:
    images: List[str] = []
    debug = False  # set True while debugging
    # 1) Magento init script (gallery)
    scripts = soup.find_all("script", attrs={"type": "text/x-magento-init"})
    if debug: print(f"[ex_images] x-magento-init scripts: {len(scripts)}")
    for script in scripts:
        # Use raw string as-is; no stripping/collapsing
        text = script.string or script.get_text() or ""
        if "mage/gallery/gallery" not in text:
            continue
        # Correct (not over-escaped) patterns:
        m = re.search(r'"data"\s*:\s*(\[[\s\S]*?\])', text)
        if not m:
            if debug: print("[ex_images] 'data' array not found in gallery block")
            continue
        arr_txt = m.group(1)
        added = False
        try:
            data = json.loads(arr_txt)
            for entry in data:
                u = abs_url(entry.get("full")) or abs_url(entry.get("img"))
                if u:
                    images.append(u); added = True
        except Exception as e:
            if debug: print(f"[ex_images] json.loads failed: {e!r}; trying regex fallback")
            # Fallback to simple key extraction
            fulls = re.findall(r'"full"\s*:\s*"([^"]+)"', arr_txt)
            imgs  = re.findall(r'"img"\s*:\s*"([^"]+)"',  arr_txt) if not fulls else []
            for u in (fulls or imgs):
                u = abs_url(u)
                if u:
                    images.append(u); added = True
        if added:
            break  # got what we need from the gallery block
    # 2) JSON-LD fallback
    if not images:
        for script in soup.find_all("script", attrs={"type": "application/ld+json"}):
            raw = script.string or script.get_text() or ""
            try:
                data = json.loads(raw)
            except Exception:
                continue
            def add_from(val):
                if isinstance(val, str):
                    u = abs_url(val);  u and images.append(u)
                elif isinstance(val, list):
                    for v in val:
                        if isinstance(v, str):
                            u = abs_url(v); u and images.append(u)
                        elif isinstance(v, dict) and "url" in v:
                            u = abs_url(v["url"]); u and images.append(u)
                elif isinstance(val, dict) and "url" in val:
                    u = abs_url(val["url"]); u and images.append(u)
            if isinstance(data, dict) and "image" in data:
                add_from(data["image"])
            if isinstance(data, list):
                for item in data:
                    if isinstance(item, dict) and "image" in item:
                        add_from(item["image"])
    # 3) Generic DOM scan fallback
    if not images:
        # consider broadening selectors if needed, e.g. '.fotorama__img'
        for el in soup.select(".product.media img, .gallery-placeholder img, .fotorama__stage img"):
            for cand in collect_img_candidates(el):
                u = abs_url(cand)
                if u:
                    images.append(u)
    images = dedup_by_filename(images)
    if debug: print(f"[ex_images] found images: {images}")
    return {"images": images, "image": images[0] if images else None}
--- a/scrape/product/extractors/info_table.py
+++ b/scrape/product/extractors/info_table.py
@@ -0,0 +1,76 @@
 from __future__ import annotations
 from typing import Dict, Union
 from bs4 import BeautifulSoup
 from utils import normalize_text
 from ..registry import extractor
 from ..helpers.price import parse_price, parse_case_size
@extractor
 def ex_info_table(soup: BeautifulSoup, url: str) -> Dict:
    """
    Extracts:
    <div class="product-page-info-table"> ... rows of label/content ... </div>
    Produces:
      info_table (raw map), brand, rrp[_raw|_currency], price_per_unit[_raw|_currency],
      case_size_* fields
    """
    container = soup.select_one(".product-page-info-table") or None
    if not container:
        return {}
    rows_parent = container.select_one(".product-page-info-table-rows") or container
    rows = rows_parent.select(".product-page-info-table-row") or []
    if not rows:
        return {}
    raw_map: Dict[str, str] = {}
    for r in rows:
        lab_el = r.select_one(".product-page-info-table__label")
        val_el = r.select_one(".product-page-info-table__content")
        if not lab_el or not val_el:
            continue
        label = normalize_text(lab_el.get_text())
        value = normalize_text(val_el.get_text())
        if label:
            raw_map[label] = value
    out: Dict[str, Union[str, float, int, Dict]] = {"info_table": raw_map}
    # Brand
    brand = raw_map.get("Brand") or raw_map.get("Brand Name") or None
    if brand:
        out["brand"] = brand
    # RRP
    rrp_val, rrp_cur, rrp_raw = parse_price(raw_map.get("RRP", ""))
    if rrp_raw and (rrp_val is not None or rrp_cur is not None):
        out["rrp_raw"] = rrp_raw
    if rrp_val is not None:
        out["rrp"] = rrp_val
    if rrp_cur:
        out["rrp_currency"] = rrp_cur
    # Price Per Unit
    ppu_val, ppu_cur, ppu_raw = parse_price(
        raw_map.get("Price Per Unit", "") or raw_map.get("Unit Price", "")
    )
    if ppu_raw and (ppu_val is not None or ppu_cur is not None):
        out["price_per_unit_raw"] = ppu_raw
    if ppu_val is not None:
        out["price_per_unit"] = ppu_val
    if ppu_cur:
        out["price_per_unit_currency"] = ppu_cur
    # Case Size
    cs_text = raw_map.get("Case Size", "") or raw_map.get("Pack Size", "")
    cs_count, cs_item_qty, cs_item_unit, cs_raw = parse_case_size(cs_text)
    if cs_raw:
        out["case_size_raw"] = cs_raw
    if cs_count is not None:
        out["case_size_count"] = cs_count
    if cs_item_qty is not None:
        out["case_size_item_qty"] = cs_item_qty
    if cs_item_unit:
        out["case_size_item_unit"] = cs_item_unit
    return out
--- a/scrape/product/extractors/labels.py
+++ b/scrape/product/extractors/labels.py
@@ -0,0 +1,41 @@
 from __future__ import annotations
 from typing import Dict, List
 from bs4 import BeautifulSoup
 from utils import normalize_text
 from ..registry import extractor
@extractor
 def ex_labels(soup: BeautifulSoup, url: str) -> Dict:
    """
    From:
      <ul class="cdz-product-labels">
        <li class="label-item new"><div class="label-content">NEW</div></li>
      </ul>
    Returns "labels": lower-cased union of class hints and visible text.
    """
    root = soup.select_one("ul.cdz-product-labels")
    if not root:
        return {}
    items: List[str] = []
    texts: List[str] = []
    for li in root.select("li.label-item"):
        for c in (li.get("class") or []):
            c = (c or "").strip()
            if c and c.lower() != "label-item" and c not in items:
                items.append(c)
        txt = normalize_text(li.get_text())
        if txt and txt not in texts:
            texts.append(txt)
    if not items and not texts:
        return {}
    union = []
    seen = set()
    for s in items + [t.lower() for t in texts]:
        key = (s or "").strip().lower()
        if key and key not in seen:
            seen.add(key)
            union.append(key)
    return {"labels": union}
--- a/scrape/product/extractors/nutrition_ex.py
+++ b/scrape/product/extractors/nutrition_ex.py
@@ -0,0 +1,129 @@
 from __future__ import annotations
 from typing import Dict, List, Optional, Tuple
 import re
 from bs4 import BeautifulSoup
 from utils import normalize_text
 from ..registry import extractor
 from ..helpers.desc import (
    split_description_container, find_description_container,
    pair_title_content_from_magento_tabs, scan_headings_for_sections,
 )
 # ----- value/unit parser ------------------------------------------------------
 _NUM_UNIT_RE = re.compile(
    r"""
    ^\s*
    (?P<num>[-+]?\d{1,3}(?:[.,]\d{3})*(?:[.,]\d+)?|\d+(?:[.,]\d+)?)
    \s*
    (?P<unit>[a-zA-Z%µ/]+)?
    \s*$
    """,
    re.X,
 )
 def _parse_value_unit(s: str) -> Tuple[Optional[str], Optional[str]]:
    if not s:
        return None, None
    s = re.sub(r"\s+", " ", s.strip())
    m = _NUM_UNIT_RE.match(s)
    if not m:
        return None, None
    num = (m.group("num") or "").replace(",", "")
    unit = m.group("unit") or None
    if unit:
        u = unit.lower()
        if u in {"kcal", "kcal.", "kcalories", "kcalorie"}:
            unit = "kcal"
        elif u in {"kj", "kj.", "kilojoule", "kilojoules"}:
            unit = "kJ"
    return (num or None, unit)
 # ----- section finder ---------------------------------------------------------
 def _find_nutrition_section_html(soup: BeautifulSoup) -> Optional[str]:
    """
    Return the HTML for the section whose title matches 'Nutritional Information'.
    We look in the same places your description extractor does.
    """
    # 1) Magento tabs
    for t, html in (pair_title_content_from_magento_tabs(soup) or []):
        if not t or not html:
            continue
        title = normalize_text(t).rstrip(":").lower()
        if "nutritional information" in title:
            return html
    # 2) Description container split into sections
    desc_el = find_description_container(soup)
    if desc_el:
        _open_html, sections = split_description_container(desc_el)
        for sec in sections or []:
            title = normalize_text((sec.get("title") or "")).rstrip(":").lower()
            if "nutritional information" in title:
                return sec.get("html") or ""
    # 3) Fallback: generic heading scan
    for t, html in (scan_headings_for_sections(soup) or []):
        if not t or not html:
            continue
        title = normalize_text(t).rstrip(":").lower()
        if "nutritional information" in title:
            return html
    return None
 # ----- table parser -----------------------------------------------------------
 def _extract_rows_from_table(root: BeautifulSoup) -> List[Dict[str, str]]:
    out: List[Dict[str, str]] = []
    table = root.select_one("table")
    if not table:
        return out
    for tr in table.select("tr"):
        th = tr.find("th")
        tds = tr.find_all("td")
        if th and tds:
            key = normalize_text(th.get_text(" ").strip())
            val_raw = normalize_text(tds[0].get_text(" ").strip())
        elif len(tds) >= 2:
            key = normalize_text(tds[0].get_text(" ").strip())
            val_raw = normalize_text(tds[1].get_text(" ").strip())
        else:
            continue
        if not key or not val_raw:
            continue
        value, unit = _parse_value_unit(val_raw)
        if value is None:  # keep raw if not parseable
            value, unit = val_raw, None
        out.append({"key": key, "value": value, "unit": unit})
    # Deduplicate while preserving order
    seen = set()
    dedup: List[Dict[str, str]] = []
    for r in out:
        t = (r["key"], r.get("value"), r.get("unit"))
        if t in seen:
            continue
        seen.add(t)
        dedup.append(r)
    return dedup
 # ----- extractor --------------------------------------------------------------
@extractor
 def ex_nutrition(soup: BeautifulSoup, url: str) -> Dict:
    """
    Extract nutrition ONLY from the section titled 'Nutritional Information'.
    Returns: {"nutrition": [{"key": "...", "value": "...", "unit": "..."}]}
    """
    section_html = _find_nutrition_section_html(soup)
    if not section_html:
        return {"nutrition": []}
    section_soup = BeautifulSoup(section_html, "lxml")
    rows = _extract_rows_from_table(section_soup)
    return {"nutrition": rows}
--- a/scrape/product/extractors/oe_list_price.py
+++ b/scrape/product/extractors/oe_list_price.py
@@ -0,0 +1,56 @@
 from __future__ import annotations
 from typing import Dict, Union
 from bs4 import BeautifulSoup
 from ..registry import extractor
 from ..helpers.price import parse_price
@extractor
 def ex_oe_list_price(soup: BeautifulSoup, url: str) -> Dict:
    """
    Extract Magento "oe-list-price" block:
      <div class="oe-list-price">
        <div class="rrp-price"><label>Regular Price: </label><span class="price">£30.50</span></div>
        <div class="oe-final-price"><label>Special Price: </label><span>£23.63</span></div>
      </div>
    Produces:
      oe_list_price: { rrp_raw, rrp, rrp_currency, special_raw, special, special_currency }
    Also promotes special_* to top-level (special_price_*) if available.
    """
    box = soup.select_one(".oe-list-price")
    if not box:
        return {}
    out: Dict[str, Union[str, float, dict]] = {}
    oe: Dict[str, Union[str, float]] = {}
    # RRP inside oe-list-price (if present)
    rrp = box.select_one(".rrp-price")
    if rrp:
        txt = (rrp.select_one("span.price") or rrp.select_one("span") or rrp).get_text(strip=True)
        val, cur, raw = parse_price(txt)
        if raw:
            oe["rrp_raw"] = raw
        if val is not None:
            oe["rrp"] = val
        if cur:
            oe["rrp_currency"] = cur
    # Special Price inside oe-list-price
    sp = box.select_one(".oe-final-price, .special-price, .final-price")
    if sp:
        txt = (sp.select_one("span.price") or sp.select_one("span") or sp).get_text(strip=True)
        val, cur, raw = parse_price(txt)
        if raw:
            oe["special_raw"] = raw
        if val is not None:
            oe["special"] = val
            out["special_price"] = val
        if cur:
            oe["special_currency"] = cur
            out["special_price_currency"] = cur
        if raw:
            out["special_price_raw"] = raw
    if oe:
        out["oe_list_price"] = oe
    return out
--- a/scrape/product/extractors/regular_price_fallback.py
+++ b/scrape/product/extractors/regular_price_fallback.py
@@ -0,0 +1,33 @@
 from __future__ import annotations
 from typing import Dict, Union
 from bs4 import BeautifulSoup
 from ..registry import extractor
 from ..helpers.price import parse_price
@extractor
 def ex_regular_price_fallback(soup: BeautifulSoup, url: str) -> Dict:
    """
    Fallback extractor for legacy 'Regular Price' blocks outside oe-list-price:
      <div class="rrp-price"><label>Regular Price: </label><span class="price">£16.55</span></div>
    """
    rrp = soup.select_one("div.rrp-price")
    if not rrp:
        return {}
    span = rrp.select_one("span.price")
    price_text = span.get_text(strip=True) if span else rrp.get_text(" ", strip=True)
    value, currency, raw = parse_price(price_text or "")
    out: Dict[str, Union[str, float]] = {}
    if raw:
        out["regular_price_raw"] = raw
    if value is not None:
        out["regular_price"] = value
    if currency:
        out["regular_price_currency"] = currency
    if value is not None:
        out.setdefault("rrp", value)
    if currency:
        out.setdefault("rrp_currency", currency)
    if raw:
        out.setdefault("rrp_raw", raw)
    return out
--- a/scrape/product/extractors/short_description.py
+++ b/scrape/product/extractors/short_description.py
@@ -0,0 +1,19 @@
 from __future__ import annotations
 from typing import Dict
 from bs4 import BeautifulSoup
 from utils import normalize_text
 from ..registry import extractor
@extractor
 def ex_short_description(soup: BeautifulSoup, url: str) -> Dict:
    desc_short = None
    for sel in [".product.attribute.description .value", ".product.attribute.overview .value",
                "meta[name='description']", "meta[property='og:description']"]:
        el = soup.select_one(sel)
        if not el:
            continue
        desc_short = normalize_text(el.get_text() if el.name != "meta" else el.get("content"))
        if desc_short:
            break
    return {"description_short": desc_short}
--- a/scrape/product/extractors/stickers.py
+++ b/scrape/product/extractors/stickers.py
@@ -0,0 +1,30 @@
 from __future__ import annotations
 from typing import Dict, List
 from bs4 import BeautifulSoup
 from ..registry import extractor
@extractor
 def ex_stickers(soup: BeautifulSoup, url: str) -> Dict:
    """
    <div class="stickers">
      <span class="sticker xxx"></span>
      ...
    </div>
    """
    root = soup.select_one("div.stickers")
    if not root:
        return {"stickers": []}
    stickers: List[str] = []
    seen = set()
    for sp in root.select("span.sticker"):
        classes = sp.get("class") or []
        extras = [c.strip() for c in classes if c and c.lower() != "sticker"]
        data_name = (sp.get("data-sticker") or "").strip()
        if data_name:
            extras.append(data_name)
        for x in extras:
            if x and x not in seen:
                seen.add(x)
                stickers.append(x)
    return {"stickers": stickers}
--- a/scrape/product/extractors/title.py
+++ b/scrape/product/extractors/title.py
@@ -0,0 +1,17 @@
 from __future__ import annotations
 from typing import Dict
 from bs4 import BeautifulSoup
 from utils import normalize_text
 from ..registry import extractor
@extractor
 def ex_title(soup: BeautifulSoup, url: str) -> Dict:
    title = None
    for sel in ["h1.page-title span", "h1.page-title", "h1.product-name", "meta[property='og:title']"]:
        el = soup.select_one(sel)
        if el:
            title = normalize_text(el.get_text()) if el.name != "meta" else el.get("content")
            if title:
                break
    return {"title": title or "Product"}
--- a/scrape/product/helpers/desc.py
+++ b/scrape/product/helpers/desc.py
@@ -0,0 +1,165 @@
 from __future__ import annotations
 from typing import Dict, List, Optional, Tuple
 from bs4 import BeautifulSoup, NavigableString, Tag
 from utils import normalize_text
 from ...html_utils import absolutize_fragment
 from .text import clean_title, is_blacklisted_heading
 from config import config
 def split_description_container(desc_el: Tag) -> Tuple[str, List[Dict]]:
    """
    Extract sections from accordion blocks within the description container.
    Looks for headings with class 'accordion-title' and pairs each with its
    next element-sibling having class 'accordion-details'. Returns:
      - open_html: the remaining description HTML with those accordion blocks removed
      - sections:  [{"title": ..., "html": ...}, ...]
    """
    # Work on an isolated copy to avoid mutating the original DOM
    frag = BeautifulSoup(desc_el.decode_contents(), "lxml")
    # Collect candidate (heading, details) pairs without mutating during iteration
    pairs: List[Tuple[Tag, Tag]] = []
    for h in frag.select("#accordion .accordion-title, .accordion .accordion-title, h5.accordion-title, .accordion-title"):
        if not isinstance(h, Tag):
            continue
        title = clean_title((h.get_text() or "").strip())
        if not title:
            continue
        # Walk forward siblings until we hit an element; accept the first with 'accordion-details'
        sib = h.next_sibling
        details: Optional[Tag] = None
        while sib is not None:
            if isinstance(sib, Tag):
                classes = sib.get("class") or []
                if "accordion-details" in classes:
                    details = sib
                break
            sib = sib.next_sibling
        if details is not None:
            pairs.append((h, details))
    sections: List[Dict] = []
    # Extract sections, then remove nodes from frag
    for h, details in pairs:
        # Pull details HTML
        html = details.decode_contents()
        # Only keep non-empty (textual) content
        if normalize_text(BeautifulSoup(html, "lxml").get_text()):
            sections.append({
                "title": clean_title(h.get_text() or ""),
                "html": absolutize_fragment(html),
            })
        # Remove the matched nodes from the fragment copy
        details.decompose()
        h.decompose()
    # Whatever remains is the open description html
    open_html = absolutize_fragment(str(frag)) if frag else ""
    return open_html, sections
 def pair_title_content_from_magento_tabs(soup: BeautifulSoup):
    out = []
    container = soup.select_one(".product.info.detailed .product.data.items") or soup.select_one(".product.data.items")
    if not container:
        return out
    titles = container.select(".data.item.title")
    for t in titles:
        title = normalize_text(t.get_text())
        if not title:
            continue
        content_id = t.get("aria-controls") or t.get("data-target")
        content = soup.select_one(f"#{content_id}") if content_id else None
        if content is None:
            sib = t.find_next_sibling(
                lambda x: isinstance(x, Tag) and "data" in x.get("class", []) and "item" in x.get("class", []) and "content" in x.get("class", [])
            )
            content = sib
        if content:
            html = content.decode_contents()
            if not is_blacklisted_heading(title):
                out.append((title, absolutize_fragment(html)))
    return out
 def scan_headings_for_sections(soup: BeautifulSoup):
    out = []
    container = (
        soup.select_one(".product.info.detailed")
        or soup.select_one(".product-info-main")
        or soup.select_one(".page-main")
        or soup
    )
    heads = container.select("h2, h3, h4, h5, h6")
    section_titles = (config().get("section-titles") or [])
    for h in heads:
        title = clean_title(h.get_text() or "")
        if not title:
            continue
        low = title.lower()
        if not any(k in low for k in section_titles + ["product description", "description", "details"]):
            continue
        parts: List[str] = []
        for sib in h.next_siblings:
            if isinstance(sib, NavigableString):
                parts.append(str(sib))
                continue
            if isinstance(sib, Tag) and sib.name in ("h2", "h3", "h4", "h5", "h6"):
                break
            if isinstance(sib, Tag):
                parts.append(str(sib))
        html = absolutize_fragment("".join(parts).strip())
        if html and not is_blacklisted_heading(title):
            out.append((title, html))
    return out
 def additional_attributes_table(soup: BeautifulSoup) -> Optional[str]:
    table = soup.select_one(".additional-attributes, table.additional-attributes, .product.attribute.additional table")
    if not table:
        return None
    try:
        rows = []
        for tr in table.select("tr"):
            th = tr.find("th") or tr.find("td")
            tds = tr.find_all("td")
            key = normalize_text(th.get_text()) if th else None
            val = normalize_text(tds[-1].get_text()) if tds else None
            if key and val:
                rows.append((key, val))
        if not rows:
            return None
        items = "\n".join(
            [
                f"""<div class='grid grid-cols-3 gap-2 py-1 border-b'>
 <div class='col-span-1 font-medium'>{key}</div>
 <div class='col-span-2 text-stone-700'>{val}</div>
 </div>"""
                for key, val in rows
            ]
        )
        return f"<div class='rounded-lg border bg-white'>{items}</div>"
    except Exception:
        return None
 def find_description_container(soup: BeautifulSoup) -> Optional[Tag]:
    for sel in ["#description", "#tab-description", ".product.attribute.description .value",
                ".product.attribute.overview .value", ".product.info.detailed .value"]:
        el = soup.select_one(sel)
        if el and normalize_text(el.get_text()):
            return el
    for h in soup.select("h2, h3, h4, h5, h6"):
        txt = normalize_text(h.get_text()).lower()
        if txt.startswith("product description") or txt == "description":
            wrapper = soup.new_tag("div")
            for sib in h.next_siblings:
                if isinstance(sib, Tag) and sib.name in ("h2", "h3", "h4", "h5", "h6"):
                    break
                wrapper.append(sib if isinstance(sib, Tag) else NavigableString(str(sib)))
            if normalize_text(wrapper.get_text()):
                return wrapper
    return None
--- a/scrape/product/helpers/html.py
+++ b/scrape/product/helpers/html.py
@@ -0,0 +1,53 @@
 from __future__ import annotations
 from typing import List, Optional
 from urllib.parse import urljoin, urlparse
 from config import config
 def first_from_srcset(val: str) -> Optional[str]:
    if not val:
        return None
    first = val.split(",")[0].strip()
    parts = first.split()
    return parts[0] if parts else first
 def abs_url(u: Optional[str]) -> Optional[str]:
    if not u:
        return None
    return urljoin(config()["base_url"], u) if isinstance(u, str) and u.startswith("/") else u
 def collect_img_candidates(el) -> List[str]:
    urls: List[str] = []
    if not el:
        return urls
    attrs = ["src", "data-src", "data-original", "data-zoom-image", "data-thumb", "content", "href"]
    for a in attrs:
        v = el.get(a)
        if v:
            urls.append(v)
    for a in ["srcset", "data-srcset"]:
        v = el.get(a)
        if v:
            first = first_from_srcset(v)
            if first:
                urls.append(first)
    return urls
 def _filename_key(u: str) -> str:
    p = urlparse(u)
    path = p.path or ""
    if path.endswith("/"):
        path = path[:-1]
    last = path.split("/")[-1]
    return f"{p.netloc}:{last}".lower()
 def dedup_by_filename(urls: List[str]) -> List[str]:
    seen = set()
    out: List[str] = []
    for u in urls:
        k = _filename_key(u)
        if k in seen:
            continue
        seen.add(k)
        out.append(u)
    return out
--- a/scrape/product/helpers/price.py
+++ b/scrape/product/helpers/price.py
@@ -0,0 +1,42 @@
 from __future__ import annotations
 import re
 from typing import Optional, Tuple
 def parse_price(text: str) -> Tuple[Optional[float], Optional[str], str]:
    """
    Return (value, currency, raw) from a price-like string.
    Supports symbols £, €, $; strips thousands commas.
    """
    raw = (text or "").strip()
    m = re.search(r'([£€$])?\s*([0-9][0-9.,]*)', raw)
    if not m:
        return None, None, raw
    sym = m.group(1) or ""
    num = m.group(2).replace(",", "")
    try:
        value = float(num)
    except ValueError:
        return None, None, raw
    currency = {"£": "GBP", "€": "EUR", "$": "USD"}.get(sym, None)
    return value, currency, raw
 def parse_case_size(text: str) -> Tuple[Optional[int], Optional[float], Optional[str], str]:
    """
    Parse strings like "6 x 500g", "12x1L", "24 × 330 ml"
    Returns (count, item_qty, item_unit, raw)
    """
    raw = (text or "").strip()
    if not raw:
        return None, None, None, raw
    t = re.sub(r"[×Xx]\s*", " x ", raw)
    m = re.search(r"(\d+)\s*x\s*([0-9]*\.?[0-9]+)\s*([a-zA-Z]+)", t)
    if not m:
        return None, None, None, raw
    count = int(m.group(1))
    try:
        item_qty = float(m.group(2))
    except ValueError:
        item_qty = None
    unit = m.group(3)
    return count, item_qty, unit, raw
--- a/scrape/product/helpers/text.py
+++ b/scrape/product/helpers/text.py
@@ -0,0 +1,16 @@
 from __future__ import annotations
 import re
 from utils import normalize_text
 from config import config
 def clean_title(t: str) -> str:
    t = normalize_text(t)
    t = re.sub(r":\s*$", "", t)
    return t
 def is_blacklisted_heading(title: str) -> bool:
    """Return True if heading should be skipped based on config blacklist."""
    bl = (config().get("blacklist") or {}).get("product-details") or []
    low = (title or "").strip().lower()
    return any(low == (s or "").strip().lower() for s in bl)
--- a/scrape/product/product_core.py
+++ b/scrape/product/product_core.py
@@ -0,0 +1,48 @@
 from __future__ import annotations
 from typing import Dict, Tuple, Union
 from utils import soup_of
 from ..http_client import fetch
 from ..html_utils import absolutize_fragment
 from suma_browser.app.bp.browse.services.slugs import product_slug_from_href
 from .registry import REGISTRY, merge_missing
 from . import extractors as _auto_register  # noqa: F401  (import-time side effects)
 async def scrape_product_detail(product_url: str, include_html: bool = False) -> Union[dict, Tuple[dict, str]]:
    """
    Returns a dict with fields (subset):
      title, images, image, description_short, description_html, sections,
      slug, suma_href, stickers, labels, info_table fields, oe_list_price, prices,
      breadcrumbs-derived category_* fields.
    If include_html=True, returns (data, html).
    """
    html = await fetch(product_url)
    data: Dict[str, Union[str, float, int, list, dict, None]] = {
        "suma_href": product_url,
        "slug": product_slug_from_href(product_url),
    }
    # Run all extractors
    for fn in REGISTRY:
        try:
            soup = soup_of(html)
            piece = fn(soup, product_url) or {}
        except Exception:
            # Tolerate site drift
            continue
        merge_missing(data, piece)
    # If we found short description but not description_html, echo it
    if not data.get("description_html") and data.get("description_short"):
        data["description_html"] = absolutize_fragment(f"<p>{data['description_short']}</p>")
    # Ensure "image" mirrors first of images if not set
    if not data.get("image"):
        imgs = data.get("images") or []
        if isinstance(imgs, list) and imgs:
            data["image"] = imgs[0]
    if include_html:
        return data, html
    return data
--- a/scrape/product/product_detail.py
+++ b/scrape/product/product_detail.py
@@ -0,0 +1,4 @@
 from __future__ import annotations
 # Thin wrapper to keep import path stable
 from .product_core import scrape_product_detail  # re-export
--- a/scrape/product/registry.py
+++ b/scrape/product/registry.py
@@ -0,0 +1,20 @@
 from __future__ import annotations
 from typing import Callable, Dict, List, Union
 Extractor = Callable[[object, str], Dict[str, Union[str, float, int, list, dict, None]]]
 REGISTRY: List[Extractor] = []
 def extractor(fn: Extractor) -> Extractor:
    """Decorator to register an extractor."""
    REGISTRY.append(fn)
    return fn
 def merge_missing(dst: dict, src: dict) -> None:
    """
    Merge src into dst. Only write keys that are missing or empty in dst.
    "Empty" means None, "", [], {}.
    """
    for k, v in (src or {}).items():
        if k not in dst or dst[k] in (None, "", [], {}):
            dst[k] = v
--- a/templates/_types/browse/_admin.html
+++ b/templates/_types/browse/_admin.html
@@ -0,0 +1,7 @@
 {% import "macros/links.html" as links %}
 {% if g.rights.admin %}
    {% from 'macros/admin_nav.html' import admin_nav_item %}
    {{admin_nav_item(
      url_for('market.browse.product.admin', slug=slug)
    )}}
 {% endif %}
--- a/templates/_types/browse/_main_panel.html
+++ b/templates/_types/browse/_main_panel.html
@@ -0,0 +1,5 @@
  <div class="grid grid-cols-1 sm:grid-cols-3 md:grid-cols-6 gap-3">
    {% include "_types/browse/_product_cards.html" %}
  </div>
  <div class="pb-8"></div>
--- a/templates/_types/browse/_oob_elements.html
+++ b/templates/_types/browse/_oob_elements.html
@@ -0,0 +1,37 @@
 {% extends 'oob_elements.html' %}
 {# OOB elements for HTMX navigation - all elements that need updating #}
 {# Import shared OOB macros #}
 {% from '_types/root/header/_oob.html' import root_header_start, root_header_end with context %}
 {% from '_types/root/_oob_menu.html' import mobile_menu with context %}
 {# Header with app title - includes cart-mini, navigation, and market-specific header #}
 {% block oobs %}
  {% from '_types/root/_n/macros.html' import oob_header with context %}
  {{oob_header('root-header-child', 'market-header-child', '_types/market/header/_header.html')}}
  {% from '_types/root/header/_header.html' import header_row with context %}
  {{ header_row(oob=True) }}
 {% endblock %}
 {% block mobile_menu %}
  {% include '_types/market/mobile/_nav_panel.html' %}
 {% endblock %}
 {# Filter container with child summary - from browse/index.html child_summary block #}
 {% block filter %}
  {% include "_types/browse/mobile/_filter/summary.html" %}
 {% endblock %}
 {% block aside %}
  {% include "_types/browse/desktop/menu.html" %}
 {% endblock %}
 {% block content %}
      {% include "_types/browse/_main_panel.html" %}
 {% endblock %}
--- a/templates/_types/browse/_product_card.html
+++ b/templates/_types/browse/_product_card.html
@@ -0,0 +1,104 @@
 {% import 'macros/stickers.html' as stick %}
 {% import '_types/product/prices.html' as prices %}
 {% set prices_ns = namespace() %}
 {{ prices.set_prices(p, prices_ns) }}
 {% set item_href = url_for('market.browse.product.product_detail', slug=p.slug)|host %}
 <div class="flex flex-col rounded-xl bg-white shadow hover:shadow-md transition overflow-hidden relative">
  {# ❤️ like button overlay - OUTSIDE the link #}
  {% if g.user %}
  <div class="absolute top-2 right-2 z-10 text-6xl md:text-xl">
    {% set slug = p.slug %}
    {% set liked = p.is_liked or False %}
    {% include "_types/browse/like/button.html" %}
  </div>
  {% endif %}
  <a
    href="{{ item_href }}"
    hx-get="{{ item_href }}"
    hx-target="#main-panel"
    hx-select ="{{hx_select_search}}"
    hx-swap="outerHTML"
    hx-push-url="true"
    class=""
  >
    {# Make this relative so we can absolutely position children #}
    <div class="w-full aspect-square bg-stone-100 relative">
      {% if p.image %}
        <figure class="inline-block w-full h-full">
          <div class="relative w-full h-full">
            <img
              src="{{ p.image }}"
              alt="no image"
              class="absolute inset-0 w-full h-full object-contain object-top"
              loading="lazy" decoding="async" fetchpriority="low"
            />
            {% for l in p.labels %}
              <img
                src="{{ asset_url('labels/' + l + '.svg') }}"
                alt=""
                class="pointer-events-none absolute inset-0 w-full h-full object-contain object-top"
              />
            {% endfor %}
          </div>
          <figcaption class="
              mt-2 text-sm text-center
              {{ 'bg-yellow-200' if p.brand in selected_brands else '' }}
              text-stone-600
            ">
            {{ p.brand }}
          </figcaption>
        </figure>
      {% else %}
        <div class="p-2 flex flex-col items-center justify-center gap-2 text-red-500 h-full relative">
          <div class="text-stone-400 text-xs">No image</div>
          <ul class="flex flex-row gap-1">
            {% for l in p.labels %}
              <li>{{ l }}</li>
            {% endfor %}
          </ul>
          <div class="text-stone-900 text-center line-clamp-3 break-words [overflow-wrap:anywhere]">
            {{ p.brand }}
          </div>
        </div>
      {% endif %}
    </div>
    {# <div>{{ prices.rrp(prices_ns) }}</div> #}
    {{ prices.card_price(p)}}
    {% import '_types/product/_cart.html' as _cart %}
  </a>
 <div class="flex justify-center">
  {{ _cart.add(p.slug, cart)}}
 </div>
  <a
    href="{{ item_href }}"
    hx-get="{{ item_href }}"
    hx-target="#main-panel"
    hx-select ="{{hx_select_search}}"
    hx-swap="outerHTML"
    hx-push-url="true"
  >
    <div class="flex flex-row justify-center gap-2 p-2">
      {% for s in p.stickers %}
        {{ stick.sticker(
          asset_url('stickers/' + s + '.svg'),
          s,
          True,
          size=24,
          found=s in selected_stickers
        ) }}
      {% endfor %}
    </div>
    <div class="text-sm font-medium text-stone-800 text-center line-clamp-3 break-words [overflow-wrap:anywhere]">
      {{ p.title | highlight(search) }}
    </div>
  </a>
 </div>
--- a/templates/_types/browse/_product_cards.html
+++ b/templates/_types/browse/_product_cards.html
@@ -0,0 +1,107 @@
 {% for p in products %}
  {% include "_types/browse/_product_card.html" %}
 {% endfor %}
 {% if page < total_pages|int %}
  <div
    id="sentinel-{{ page }}-m"
    class="block md:hidden h-[60vh] opacity-0 pointer-events-none js-mobile-sentinel"
    hx-get="{{ (current_local_href ~ {'page': page + 1}|qs)|host }}"
    hx-trigger="intersect once delay:250ms, sentinelmobile:retry"
    hx-swap="outerHTML"
    _="
      init
        if not me.dataset.retryMs then set me.dataset.retryMs to 1000 end
        if window.matchMedia('(min-width: 768px)').matches then set @hx-disabled to '' end
      on resize from window
        if window.matchMedia('(min-width: 768px)').matches then set @hx-disabled to '' else remove @hx-disabled end
      on htmx:beforeRequest
        if window.matchMedia('(min-width: 768px)').matches then halt end
        add .hidden to .js-neterr in me
        remove .hidden from .js-loading in me
        remove .opacity-100 from me
        add .opacity-0 to me
      def backoff()
        set ms to me.dataset.retryMs
        if ms > 30000 then set ms to 30000 end
        -- show big SVG panel & make sentinel visible
        add .hidden to .js-loading in me
        remove .hidden from .js-neterr in me
        remove .opacity-0 from me
        add .opacity-100 to me
        wait ms ms
        trigger sentinelmobile:retry
        set ms to ms * 2
        if ms > 30000 then set ms to 30000 end
        set me.dataset.retryMs to ms
      end
      on htmx:sendError     call backoff()
      on htmx:responseError call backoff()
      on htmx:timeout       call backoff()
    "
    role="status"
    aria-live="polite"
    aria-hidden="true"
  >
    {% include "sentinel/mobile_content.html" %}
  </div>
  <!-- DESKTOP sentinel (custom scroll container) -->
  <div
    id="sentinel-{{ page }}-d"
    class="hidden md:block h-4 opacity-0 pointer-events-none"
    hx-get="{{ (current_local_href ~ {'page': page + 1}|qs)|host}}"
    hx-trigger="intersect once delay:250ms, sentinel:retry"
    hx-swap="outerHTML"
     _="
      init
        if not me.dataset.retryMs then set me.dataset.retryMs to 1000 end
      on htmx:beforeRequest(event)
        add .hidden to .js-neterr in me
        remove .hidden from .js-loading in me
        remove .opacity-100 from me
        add .opacity-0 to me
        set trig to null
        if event.detail and event.detail.triggeringEvent then
          set trig to event.detail.triggeringEvent
        end
        if trig and trig.type is 'intersect'
          set scroller to the closest .js-grid-viewport
          if scroller is null then halt end
          if scroller.scrollTop < 20 then halt end
        end
      def backoff()
        set ms to me.dataset.retryMs
        if ms > 30000 then set ms to 30000 end
        add .hidden to .js-loading in me
        remove .hidden from .js-neterr in me
        remove .opacity-0 from me
        add .opacity-100 to me
        wait ms ms
        trigger sentinel:retry
        set ms to ms * 2
        if ms > 30000 then set ms to 30000 end
        set me.dataset.retryMs to ms
      end
      on htmx:sendError     call backoff()
      on htmx:responseError call backoff()
      on htmx:timeout       call backoff()
    "
    role="status"
    aria-live="polite"
    aria-hidden="true"
  >
    {% include "sentinel/desktop_content.html" %}
  </div>
 {% else %}
  <div class="col-span-full mt-4 text-center text-xs text-stone-400">End of results</div>
 {% endif %}
--- a/templates/_types/browse/desktop/_category_selector.html
+++ b/templates/_types/browse/desktop/_category_selector.html
@@ -0,0 +1,40 @@
 {# Categories #}
 <nav aria-label="Categories"
     class="rounded-xl border bg-white shadow-sm min-h-0">
  <ul class="divide-y">
    {% set top_active = (current_local_href == top_local_href) %}
    {% set href = (url_for('market.browse.browse_top', top_slug=top_slug) ~ qs)|host %}
    <li>
      <a
        href="{{ href }}"
        hx-get="{{ href }}"
        hx-target="#main-panel"
        hx-select="{{hx_select_search}}"
        hx-swap="outerHTML"
        hx-push-url="true"
        aria-selected="{{ 'true' if top_active else 'false' }}"
        class="block px-4 py-3 text-[15px] transition {{select_colours}}">
        <div class="prose prose-stone max-w-none">All products</div>
      </a>
    </li>
    {% for sub in subs_local %}
      {% set active = (current_local_href == sub.local_href) %}
      {% set href = (url_for('market.browse.browse_sub', top_slug=top_slug, sub_slug=sub.slug) ~ qs)|host %}
      <li>
        <a
          href="{{ href }}"
          hx-get="{{ href }}"
          hx-target="#main-panel"
          hx-select="{{hx_select_search}}"
          hx-swap="outerHTML"
          hx-push-url="true"
          aria-selected="{{ 'true' if active else 'false' }}"
          class="block px-4 py-3 text-[15px] border-l-4 transition {{select_colours}}"
        >
          <div class="prose prose-stone max-w-none">{{ (sub.html_label or sub.name) | safe }}</div>
        </a>
      </li>
    {% endfor %}
  </ul>
 </nav>
--- a/templates/_types/browse/desktop/_filter/brand.html
+++ b/templates/_types/browse/desktop/_filter/brand.html
@@ -0,0 +1,40 @@
 {# Brand filter (desktop, single-select) #}
 {# Brands #}
 <nav aria-label="Brands"
     class="rounded-xl border bg-white shadow-sm">
  <h2 class="text-md mt-2 font-semibold">Brands</h2>
  <ul class="divide-y">
    {% for b in brands %}
      {% set is_selected = (b.name in selected_brands) %}
      {% if is_selected %}
        {% set brand_href = (current_local_href ~ {"remove_brand": b.name, "page": None}|qs)|host %}
      {% else %}
        {% set brand_href = (current_local_href ~ {"add_brand": b.name, "page": None}|qs)|host %}
      {% endif %}
      <li>
        <a
          href="{{ brand_href }}"
          hx-get="{{ brand_href }}"
          hx-target="#main-panel"
          hx-select="{{hx_select_search}}"
          hx-swap="outerHTML" hx-push-url="true" hx-on:htmx:afterSwap="this.closest('details')?.removeAttribute('open')"
          class="flex items-center gap-2 px-2 py-2 rounded transition {% if is_selected %} bg-stone-900 text-white {% else %} hover:bg-stone-50 {% endif %}">
          <span class="inline-flex items-center justify-center w-5 h-5 rounded border {% if is_selected %} border-stone-900 bg-stone-900 text-white {% else %} border-stone-300 {% endif %}">
            {% if is_selected %}
              <svg viewBox="0 0 24 24" class="w-4 h-4" aria-hidden="true">
                <path d="M5 13l4 4L19 7" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"/>
              </svg>
            {% endif %}
          </span>
          <span class="flex-1 text-sm">{{ b.name }}</span>
          {% if b.count is not none %}
            <span class="{% if b.count==0 %}text-lg text-red-500{% else %}text-sm{% endif %} {% if is_selected %}opacity-90{% else %}text-stone-500{% endif %}">{{ b.count }}</span>
          {% endif %}
        </a>
      </li>
    {% endfor %}
  </ul>
 </nav>
--- a/templates/_types/browse/desktop/_filter/labels.html
+++ b/templates/_types/browse/desktop/_filter/labels.html
@@ -0,0 +1,44 @@
 {% import 'macros/stickers.html' as stick %}
 <ul
  id="labels-details-desktop"
  class="flex justify-center p-0 m-0 gap-2"
 >
  {% for s in labels %}
    {% set is_on = (selected_labels and (s.name|lower in selected_labels)) %}
    {% set qs = {"remove_label": s.name, "page":None}|qs if is_on
                  else {"add_label": s.name, "page":None}|qs %}
    {% set href = (current_local_href ~ qs)|host %}
    <li>
      <a
        href="{{ href }}"
        hx-get="{{ href }}"
        hx-target="#main-panel"
        hx-select="{{hx_select_search}}"
        hx-swap="outerHTML"
        hx-push-url="true"
        role="button"
        aria-pressed="{{ 'true' if is_on else 'false' }}"
        title="{{ s.name }}" aria-label="{{ s.name }}"
        class="flex w-full h-full flex-col items-center justify-center py-2"
      >
        <!-- col 1: icon -->
        {{ stick.sticker(asset_url('nav-labels/' + s.name + '.svg'), s.name, is_on)}}
        <!-- col 3: count (right aligned) -->
        {% if s.count is not none %}
          <span class="
            {{'text-xs text-stone-500' if s.count != 0 else 'text-md text-red-500 font-bold'}}
            leading-none justify-self-end tabular-nums">
            {{ s.count }}
          </span>
        {% endif %}
      </a>
    </li>
  {% endfor %}
 </ul>
--- a/templates/_types/browse/desktop/_filter/like.html
+++ b/templates/_types/browse/desktop/_filter/like.html
@@ -0,0 +1,38 @@
 {% import 'macros/stickers.html' as stick %}
  {% set qs = {"liked": None if liked else True, "page": None}|qs %}
  {% set href = (current_local_href ~ qs)|host %}
    <a
      href="{{ href }}"
      hx-get="{{ href }}"
      hx-target="#main-panel"
      hx-select="{{hx_select_search}}"
      hx-swap="outerHTML"
      hx-push-url="true"
      role="button"
      aria-pressed="{{ 'true' if liked else 'false' }}"
      title="liked" aria-label="liked"
      class="flex flex-col items-center justify-start p-1 rounded hover:bg-stone-50"
      {% if liked %}
        aria-label="liked and unliked"
      {% else %}
        aria-label="just liked"
      {% endif %}
    >
      {% if liked %}
        <i aria-hidden="true"
          class="fa-solid fa-heart text-red-500 text-[40px] leading-none"
        ></i>
      {% else %}
        <i aria-hidden="true"
          class="fa-solid fa-heart text-stone-300 text-[40px] leading-none"
        ></i>
      {% endif %}
      <span class="
        {{'text-[10px] text-stone-500' if liked_count != 0 else 'text-md text-red-500 font-bold'}}
        mt-1 leading-none tabular-nums
        "
        aria_label="liked count"
        >
            {{ liked_count }}
      </span>
    </a>
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1,3 @@`
							`from .product_operations import toggle_product_like, massage_full_product`

							`__all__ = ["toggle_product_like", "massage_full_product"]`
		`@@ -0,0 +1 @@`
							`APP_ROOT_PLACEHOLDER = "[__APP_ROOT__]"`