feat: initialize market app with browsing, product, and scraping code
Some checks failed
Build and Deploy / build-and-deploy (push) Has been cancelled
Some checks failed
Build and Deploy / build-and-deploy (push) Has been cancelled
Split from coop monolith. Includes: - Market/browse/product blueprints - Product sync API - Suma scraping pipeline - Templates for market, browse, and product views - Dockerfile and CI workflow for independent deployment
This commit is contained in:
7
scrape/persist_snapshot/__init__.py
Normal file
7
scrape/persist_snapshot/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from .log_product_result import log_product_result
|
||||
from .upsert_product import upsert_product
|
||||
from .save_nav import save_nav
|
||||
from .capture_listing import capture_listing
|
||||
from .save_link_reports import save_link_reports
|
||||
from .save_subcategory_redirects import save_subcategory_redirects
|
||||
|
||||
3
scrape/persist_snapshot/_get.py
Normal file
3
scrape/persist_snapshot/_get.py
Normal file
@@ -0,0 +1,3 @@
|
||||
def _get(d, key, default=None):
|
||||
v = d.get(key)
|
||||
return default if v in (None, "", [], {}) else v
|
||||
137
scrape/persist_snapshot/capture_listing.py
Normal file
137
scrape/persist_snapshot/capture_listing.py
Normal file
@@ -0,0 +1,137 @@
|
||||
# at top of persist_snapshot.py:
|
||||
from typing import Optional, List
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from datetime import datetime
|
||||
from sqlalchemy import (
|
||||
select, update
|
||||
)
|
||||
from urllib.parse import urlparse
|
||||
import re
|
||||
|
||||
from models.market import (
|
||||
NavTop,
|
||||
NavSub,
|
||||
Listing,
|
||||
ListingItem,
|
||||
)
|
||||
from db.session import get_session
|
||||
|
||||
# --- Models are unchanged, see original code ---
|
||||
|
||||
# ---------------------- Helper fns called from scraper ------------------------
|
||||
|
||||
|
||||
|
||||
async def capture_listing(
|
||||
#product_slugs: Set[str],
|
||||
url: str,
|
||||
items: List[str],
|
||||
total_pages: int
|
||||
) -> None:
|
||||
async with get_session() as session:
|
||||
await _capture_listing(
|
||||
session,
|
||||
url,
|
||||
items,
|
||||
total_pages
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _capture_listing(
|
||||
session,
|
||||
url: str,
|
||||
items: List[str],
|
||||
total_pages: int
|
||||
) -> None:
|
||||
top_id, sub_id = await _nav_ids_from_list_url(session, url)
|
||||
await _save_listing(session, top_id, sub_id, items, total_pages)
|
||||
|
||||
async def _save_listing(session: AsyncSession, top_id: int, sub_id: Optional[int],
|
||||
items: List[str], total_pages: Optional[int]) -> None:
|
||||
res = await session.execute(
|
||||
select(Listing).where(Listing.top_id == top_id, Listing.sub_id == sub_id, Listing.deleted_at.is_(None))
|
||||
)
|
||||
listing = res.scalar_one_or_none()
|
||||
if not listing:
|
||||
listing = Listing(top_id=top_id, sub_id=sub_id, total_pages=total_pages)
|
||||
session.add(listing)
|
||||
await session.flush()
|
||||
else:
|
||||
listing.total_pages = total_pages
|
||||
|
||||
# Normalize and deduplicate incoming slugs
|
||||
seen: set[str] = set()
|
||||
deduped: list[str] = []
|
||||
for s in items or []:
|
||||
if s and isinstance(s, str) and s not in seen:
|
||||
seen.add(s)
|
||||
deduped.append(s)
|
||||
|
||||
if not deduped:
|
||||
return
|
||||
|
||||
# Fetch existing slugs from the database
|
||||
res = await session.execute(
|
||||
select(ListingItem.slug)
|
||||
.where(ListingItem.listing_id == listing.id, ListingItem.deleted_at.is_(None))
|
||||
)
|
||||
existing_slugs = set(res.scalars().all())
|
||||
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Slugs to delete (present in DB but not in the new data)
|
||||
to_delete = existing_slugs - seen
|
||||
if to_delete:
|
||||
await session.execute(
|
||||
update(ListingItem)
|
||||
.where(
|
||||
ListingItem.listing_id == listing.id,
|
||||
ListingItem.slug.in_(to_delete),
|
||||
ListingItem.deleted_at.is_(None)
|
||||
)
|
||||
.values(deleted_at=now)
|
||||
)
|
||||
|
||||
# Slugs to insert (new ones not in DB)
|
||||
to_insert = seen - existing_slugs
|
||||
if to_insert:
|
||||
stmt = pg_insert(ListingItem).values(
|
||||
[{"listing_id": listing.id, "slug": s} for s in to_insert]
|
||||
)
|
||||
#.on_conflict_do_nothing(
|
||||
# constraint="uq_listing_items_listing_slug"
|
||||
#)
|
||||
await session.execute(stmt)
|
||||
|
||||
async def _nav_ids_from_list_url(session: AsyncSession, list_url: str) -> Tuple[int, Optional[int]]:
|
||||
parts = [x for x in (urlparse(list_url).path or "").split("/") if x]
|
||||
top_slug = parts[0].lower() if parts else ""
|
||||
sub_slug = None
|
||||
if len(parts) >= 2:
|
||||
sub_slug = parts[1]
|
||||
if sub_slug.lower().endswith((".html", ".htm")):
|
||||
sub_slug = re.sub(r"\\.(html?|HTML?)$", "", sub_slug)
|
||||
return await _get_nav_ids(session, top_slug, sub_slug)
|
||||
|
||||
|
||||
|
||||
async def _get_nav_ids(session: AsyncSession, top_slug: str, sub_slug: Optional[str]) -> Tuple[int, Optional[int]]:
|
||||
res_top = await session.execute(select(NavTop.id).where(NavTop.slug == top_slug, NavTop.deleted_at.is_(None)))
|
||||
top_id = res_top.scalar_one_or_none()
|
||||
if not top_id:
|
||||
raise ValueError(f"NavTop not found for slug: {top_slug}")
|
||||
|
||||
sub_id = None
|
||||
if sub_slug:
|
||||
res_sub = await session.execute(
|
||||
select(NavSub.id).where(NavSub.slug == sub_slug, NavSub.top_id == top_id, NavSub.deleted_at.is_(None))
|
||||
)
|
||||
sub_id = res_sub.scalar_one_or_none()
|
||||
if sub_id is None:
|
||||
raise ValueError(f"NavSub not found for slug: {sub_slug} under top_id={top_id}")
|
||||
|
||||
return top_id, sub_id
|
||||
35
scrape/persist_snapshot/log_product_result.py
Normal file
35
scrape/persist_snapshot/log_product_result.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# at top of persist_snapshot.py:
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from typing import Dict
|
||||
from models.market import (
|
||||
ProductLog,
|
||||
)
|
||||
from db.session import get_session
|
||||
|
||||
|
||||
async def log_product_result(ok: bool, payload: Dict) -> None:
|
||||
async with get_session() as session:
|
||||
await _log_product_result(session, ok, payload)
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _log_product_result(session: AsyncSession, ok: bool, payload: Dict) -> None:
|
||||
session.add(ProductLog(
|
||||
ok=ok,
|
||||
slug=payload.get("slug"),
|
||||
href_tried=payload.get("href_tried"),
|
||||
error_type=payload.get("error_type"),
|
||||
error_message=payload.get("error_message"),
|
||||
http_status=payload.get("http_status"),
|
||||
final_url=payload.get("final_url"),
|
||||
transport_error=payload.get("transport_error"),
|
||||
title=payload.get("title"),
|
||||
has_description_html=payload.get("has_description_html"),
|
||||
has_description_short=payload.get("has_description_short"),
|
||||
sections_count=payload.get("sections_count"),
|
||||
images_count=payload.get("images_count"),
|
||||
embedded_images_count=payload.get("embedded_images_count"),
|
||||
all_images_count=payload.get("all_images_count"),
|
||||
))
|
||||
|
||||
29
scrape/persist_snapshot/save_link_reports.py
Normal file
29
scrape/persist_snapshot/save_link_reports.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# at top of persist_snapshot.py:
|
||||
from typing import List
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from models.market import (
|
||||
LinkError,
|
||||
LinkExternal,
|
||||
)
|
||||
from db.session import get_session
|
||||
|
||||
# --- Models are unchanged, see original code ---
|
||||
|
||||
# ---------------------- Helper fns called from scraper ------------------------
|
||||
|
||||
|
||||
|
||||
async def save_link_reports(link_errors: List[Dict], link_externals: List[Dict]) -> None:
|
||||
async with get_session() as session:
|
||||
for e in link_errors:
|
||||
session.add(LinkError(
|
||||
product_slug=e.get("product"), href=e.get("href"), text=e.get("text"),
|
||||
top=e.get("top"), sub=e.get("sub"), target_slug=e.get("target_slug"), type=e.get("type"),
|
||||
))
|
||||
for e in link_externals:
|
||||
session.add(LinkExternal(
|
||||
product_slug=e.get("product"), href=e.get("href"), text=e.get("text"), host=e.get("host"),
|
||||
))
|
||||
await session.commit()
|
||||
108
scrape/persist_snapshot/save_nav.py
Normal file
108
scrape/persist_snapshot/save_nav.py
Normal file
@@ -0,0 +1,108 @@
|
||||
# at top of persist_snapshot.py:
|
||||
from datetime import datetime
|
||||
from sqlalchemy import (
|
||||
select, tuple_
|
||||
)
|
||||
from typing import Dict
|
||||
|
||||
from models.market import (
|
||||
NavTop,
|
||||
NavSub,
|
||||
)
|
||||
from db.session import get_session
|
||||
|
||||
|
||||
|
||||
|
||||
async def save_nav(nav: Dict) -> None:
|
||||
async with get_session() as session:
|
||||
await _save_nav(session, nav)
|
||||
await session.commit()
|
||||
|
||||
async def _save_nav(session, nav: Dict) -> None:
|
||||
print('===================SAVE NAV========================')
|
||||
print(nav)
|
||||
now = datetime.utcnow()
|
||||
|
||||
incoming_top_slugs = set()
|
||||
incoming_sub_keys = set() # (top_slug, sub_slug)
|
||||
|
||||
# First pass: collect slugs
|
||||
for label, data in (nav.get("cats") or {}).items():
|
||||
top_slug = (data or {}).get("slug")
|
||||
if not top_slug:
|
||||
continue
|
||||
incoming_top_slugs.add(top_slug)
|
||||
|
||||
for s in (data.get("subs") or []):
|
||||
sub_slug = s.get("slug")
|
||||
if sub_slug:
|
||||
incoming_sub_keys.add((top_slug, sub_slug))
|
||||
|
||||
# Soft-delete stale NavSub entries
|
||||
# This requires joining NavTop to access top_slug
|
||||
subs_to_delete = await session.execute(
|
||||
select(NavSub)
|
||||
.join(NavTop, NavSub.top_id == NavTop.id)
|
||||
.where(
|
||||
NavSub.deleted_at.is_(None),
|
||||
~tuple_(NavTop.slug, NavSub.slug).in_(incoming_sub_keys)
|
||||
)
|
||||
)
|
||||
for sub in subs_to_delete.scalars():
|
||||
sub.deleted_at = now
|
||||
|
||||
# Soft-delete stale NavTop entries
|
||||
tops_to_delete = await session.execute(
|
||||
select(NavTop)
|
||||
.where(
|
||||
NavTop.deleted_at.is_(None),
|
||||
~NavTop.slug.in_(incoming_top_slugs)
|
||||
)
|
||||
)
|
||||
for top in tops_to_delete.scalars():
|
||||
top.deleted_at = now
|
||||
|
||||
await session.flush()
|
||||
|
||||
# Upsert NavTop and NavSub
|
||||
for label, data in (nav.get("cats") or {}).items():
|
||||
top_slug = (data or {}).get("slug")
|
||||
if not top_slug:
|
||||
continue
|
||||
|
||||
res = await session.execute(
|
||||
select(NavTop).where(NavTop.slug == top_slug)
|
||||
)
|
||||
top = res.scalar_one_or_none()
|
||||
|
||||
if top:
|
||||
top.label = label
|
||||
top.deleted_at = None
|
||||
else:
|
||||
top = NavTop(label=label, slug=top_slug)
|
||||
session.add(top)
|
||||
|
||||
await session.flush()
|
||||
|
||||
for s in (data.get("subs") or []):
|
||||
sub_slug = s.get("slug")
|
||||
if not sub_slug:
|
||||
continue
|
||||
sub_label = s.get("label")
|
||||
sub_href = s.get("href")
|
||||
|
||||
res_sub = await session.execute(
|
||||
select(NavSub).where(
|
||||
NavSub.slug == sub_slug,
|
||||
NavSub.top_id == top.id
|
||||
)
|
||||
)
|
||||
sub = res_sub.scalar_one_or_none()
|
||||
if sub:
|
||||
sub.label = sub_label
|
||||
sub.href = sub_href
|
||||
sub.deleted_at = None
|
||||
else:
|
||||
session.add(NavSub(top_id=top.id, label=sub_label, slug=sub_slug, href=sub_href))
|
||||
|
||||
32
scrape/persist_snapshot/save_subcategory_redirects.py
Normal file
32
scrape/persist_snapshot/save_subcategory_redirects.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# at top of persist_snapshot.py:
|
||||
|
||||
from typing import Dict
|
||||
from datetime import datetime
|
||||
from sqlalchemy import (
|
||||
update
|
||||
)
|
||||
from models.market import (
|
||||
SubcategoryRedirect,
|
||||
)
|
||||
from db.session import get_session
|
||||
|
||||
# --- Models are unchanged, see original code ---
|
||||
|
||||
# ---------------------- Helper fns called from scraper ------------------------
|
||||
|
||||
|
||||
async def save_subcategory_redirects(mapping: Dict[str, str]) -> None:
|
||||
async with get_session() as session:
|
||||
await _save_subcategory_redirects(session, mapping)
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _save_subcategory_redirects(session, mapping: Dict[str, str]) -> None:
|
||||
await session.execute(update(SubcategoryRedirect).where(SubcategoryRedirect.deleted_at.is_(None)).values(deleted_at=datetime.utcnow()))
|
||||
for old, new in mapping.items():
|
||||
session.add(SubcategoryRedirect(old_path=old, new_path=new))
|
||||
|
||||
|
||||
|
||||
#for slug in items:
|
||||
# product_slugs.add(slug)
|
||||
237
scrape/persist_snapshot/upsert_product.py
Normal file
237
scrape/persist_snapshot/upsert_product.py
Normal file
@@ -0,0 +1,237 @@
|
||||
# at top of persist_snapshot.py:
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from typing import Dict
|
||||
from datetime import datetime
|
||||
from sqlalchemy import (
|
||||
func, select, update
|
||||
)
|
||||
|
||||
from models.market import (
|
||||
Product,
|
||||
ProductImage,
|
||||
ProductSection,
|
||||
ProductLabel,
|
||||
ProductSticker,
|
||||
ProductAttribute,
|
||||
ProductNutrition,
|
||||
ProductAllergen
|
||||
)
|
||||
from db.session import get_session
|
||||
|
||||
from ._get import _get
|
||||
from .log_product_result import _log_product_result
|
||||
|
||||
# --- Models are unchanged, see original code ---
|
||||
|
||||
# ---------------------- Helper fns called from scraper ------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
async def _upsert_product(session: AsyncSession, d: Dict) -> Product:
|
||||
slug = d.get("slug")
|
||||
if not slug:
|
||||
raise ValueError("product missing slug")
|
||||
res = await session.execute(select(Product).where(Product.slug == slug, Product.deleted_at.is_(None)))
|
||||
p = res.scalar_one_or_none()
|
||||
if not p:
|
||||
p = Product(slug=slug)
|
||||
session.add(p)
|
||||
|
||||
p.title = _get(d, "title")
|
||||
p.image = _get(d, "image")
|
||||
p.description_short = _get(d, "description_short")
|
||||
p.description_html = _get(d, "description_html")
|
||||
p.suma_href = _get(d, "suma_href")
|
||||
p.brand = _get(d, "brand")
|
||||
p.rrp = _get(d, "rrp")
|
||||
p.rrp_currency = _get(d, "rrp_currency")
|
||||
p.rrp_raw = _get(d, "rrp_raw")
|
||||
p.price_per_unit = _get(d, "price_per_unit")
|
||||
p.price_per_unit_currency = _get(d, "price_per_unit_currency")
|
||||
p.price_per_unit_raw = _get(d, "price_per_unit_raw")
|
||||
p.special_price = _get(d, "special_price")
|
||||
p.special_price_currency = _get(d, "special_price_currency")
|
||||
p.special_price_raw = _get(d, "special_price_raw")
|
||||
p.regular_price = _get(d, "regular_price")
|
||||
p.regular_price_currency = _get(d, "regular_price_currency")
|
||||
p.regular_price_raw = _get(d, "regular_price_raw")
|
||||
p.case_size_count = _get(d, "case_size_count")
|
||||
p.case_size_item_qty = _get(d, "case_size_item_qty")
|
||||
p.case_size_item_unit = _get(d, "case_size_item_unit")
|
||||
p.case_size_raw = _get(d, "case_size_raw")
|
||||
p.ean = d.get("ean") or d.get("barcode") or None
|
||||
p.sku = d.get("sku")
|
||||
p.unit_size = d.get("unit_size")
|
||||
p.pack_size = d.get("pack_size")
|
||||
p.updated_at = func.now()
|
||||
|
||||
now = datetime.utcnow()
|
||||
|
||||
|
||||
|
||||
# ProductSection sync
|
||||
existing_sections = await session.execute(select(ProductSection).where(ProductSection.product_id == p.id, ProductSection.deleted_at.is_(None)))
|
||||
existing_sections_set = {(s.title, s.html) for s in existing_sections.scalars()}
|
||||
|
||||
new_sections_set = set()
|
||||
for sec in d.get("sections") or []:
|
||||
if isinstance(sec, dict) and sec.get("title") and sec.get("html"):
|
||||
new_sections_set.add((sec["title"], sec["html"]))
|
||||
if (sec["title"], sec["html"]) not in existing_sections_set:
|
||||
session.add(ProductSection(product_id=p.id, title=sec["title"], html=sec["html"]))
|
||||
|
||||
for s in existing_sections_set - new_sections_set:
|
||||
await session.execute(update(ProductSection).where(ProductSection.product_id == p.id, ProductSection.title == s[0], ProductSection.html == s[1], ProductSection.deleted_at.is_(None)).values(deleted_at=now))
|
||||
|
||||
# ProductImage sync
|
||||
existing_images = await session.execute(select(ProductImage).where(ProductImage.product_id == p.id, ProductImage.deleted_at.is_(None)))
|
||||
existing_images_set = {(img.url, img.kind) for img in existing_images.scalars()}
|
||||
|
||||
new_images_set = set()
|
||||
for kind, urls in [
|
||||
("gallery", d.get("images") or []),
|
||||
("embedded", d.get("embedded_image_urls") or []),
|
||||
("all", d.get("all_image_urls") or []),
|
||||
]:
|
||||
for idx, url in enumerate(urls):
|
||||
if url:
|
||||
new_images_set.add((url, kind))
|
||||
if (url, kind) not in existing_images_set:
|
||||
session.add(ProductImage(product_id=p.id, url=url, position=idx, kind=kind))
|
||||
|
||||
for img in existing_images_set - new_images_set:
|
||||
await session.execute(update(ProductImage).where(ProductImage.product_id == p.id, ProductImage.url == img[0], ProductImage.kind == img[1], ProductImage.deleted_at.is_(None)).values(deleted_at=now))
|
||||
|
||||
# ProductLabel sync
|
||||
existing_labels = await session.execute(select(ProductLabel).where(ProductLabel.product_id == p.id, ProductLabel.deleted_at.is_(None)))
|
||||
existing_labels_set = {label.name.strip() for label in existing_labels.scalars()}
|
||||
|
||||
new_labels = {str(name).strip() for name in (d.get("labels") or []) if name}
|
||||
|
||||
for name in new_labels - existing_labels_set:
|
||||
session.add(ProductLabel(product_id=p.id, name=name))
|
||||
|
||||
for name in existing_labels_set - new_labels:
|
||||
await session.execute(update(ProductLabel).where(ProductLabel.product_id == p.id, ProductLabel.name == name, ProductLabel.deleted_at.is_(None)).values(deleted_at=now))
|
||||
|
||||
# ProductSticker sync
|
||||
existing_stickers = await session.execute(select(ProductSticker).where(ProductSticker.product_id == p.id, ProductSticker.deleted_at.is_(None)))
|
||||
existing_stickers_set = {sticker.name.strip() for sticker in existing_stickers.scalars()}
|
||||
|
||||
new_stickers = {str(name).strip().lower() for name in (d.get("stickers") or []) if name}
|
||||
|
||||
for name in new_stickers - existing_stickers_set:
|
||||
session.add(ProductSticker(product_id=p.id, name=name))
|
||||
|
||||
for name in existing_stickers_set - new_stickers:
|
||||
await session.execute(update(ProductSticker).where(ProductSticker.product_id == p.id, ProductSticker.name == name, ProductSticker.deleted_at.is_(None)).values(deleted_at=now))
|
||||
|
||||
# ProductAttribute sync
|
||||
existing_attrs = await session.execute(select(ProductAttribute).where(ProductAttribute.product_id == p.id, ProductAttribute.deleted_at.is_(None)))
|
||||
existing_attrs_set = {(a.key, a.value) for a in existing_attrs.scalars()}
|
||||
|
||||
new_attrs_set = set()
|
||||
for src, prefix in [(d.get("info_table") or {}, "info_table"), (d.get("oe_list_price") or {}, "oe_list_price")]:
|
||||
for k, v in src.items():
|
||||
key = f"{prefix}/{str(k).strip()}"
|
||||
val = None if v is None else str(v)
|
||||
new_attrs_set.add((key, val))
|
||||
if (key, val) not in existing_attrs_set:
|
||||
session.add(ProductAttribute(product_id=p.id, key=key, value=val))
|
||||
|
||||
for key, val in existing_attrs_set - new_attrs_set:
|
||||
await session.execute(update(ProductAttribute).where(ProductAttribute.product_id == p.id, ProductAttribute.key == key, ProductAttribute.value == val, ProductAttribute.deleted_at.is_(None)).values(deleted_at=now))
|
||||
|
||||
# ProductNutrition sync
|
||||
existing_nuts = await session.execute(select(ProductNutrition).where(ProductNutrition.product_id == p.id, ProductNutrition.deleted_at.is_(None)))
|
||||
existing_nuts_set = {(n.key, n.value, n.unit) for n in existing_nuts.scalars()}
|
||||
|
||||
new_nuts_set = set()
|
||||
nutrition = d.get("nutrition") or []
|
||||
if isinstance(nutrition, dict):
|
||||
for k, v in nutrition.items():
|
||||
key, val = str(k).strip(), str(v) if v is not None else None
|
||||
new_nuts_set.add((key, val, None))
|
||||
if (key, val, None) not in existing_nuts_set:
|
||||
session.add(ProductNutrition(product_id=p.id, key=key, value=val, unit=None))
|
||||
elif isinstance(nutrition, list):
|
||||
for row in nutrition:
|
||||
try:
|
||||
key = str(row.get("key") or "").strip()
|
||||
val = None if row.get("value") is None else str(row.get("value"))
|
||||
unit = None if row.get("unit") is None else str(row.get("unit"))
|
||||
if key:
|
||||
new_nuts_set.add((key, val, unit))
|
||||
if (key, val, unit) not in existing_nuts_set:
|
||||
session.add(ProductNutrition(product_id=p.id, key=key, value=val, unit=unit))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for key, val, unit in existing_nuts_set - new_nuts_set:
|
||||
await session.execute(update(ProductNutrition).where(ProductNutrition.product_id == p.id, ProductNutrition.key == key, ProductNutrition.value == val, ProductNutrition.unit == unit, ProductNutrition.deleted_at.is_(None)).values(deleted_at=now))
|
||||
|
||||
# ProductAllergen sync
|
||||
existing_allergens = await session.execute(select(ProductAllergen).where(ProductAllergen.product_id == p.id, ProductAllergen.deleted_at.is_(None)))
|
||||
existing_allergens_set = {(a.name, a.contains) for a in existing_allergens.scalars()}
|
||||
|
||||
new_allergens_set = set()
|
||||
for a in d.get("allergens") or []:
|
||||
if isinstance(a, str):
|
||||
nm, contains = a.strip(), True
|
||||
elif isinstance(a, dict):
|
||||
nm, contains = (a.get("name") or "").strip(), bool(a.get("contains", True))
|
||||
else:
|
||||
continue
|
||||
if nm:
|
||||
new_allergens_set.add((nm, contains))
|
||||
if (nm, contains) not in existing_allergens_set:
|
||||
session.add(ProductAllergen(product_id=p.id, name=nm, contains=contains))
|
||||
|
||||
for name, contains in existing_allergens_set - new_allergens_set:
|
||||
await session.execute(update(ProductAllergen).where(ProductAllergen.product_id == p.id, ProductAllergen.name == name, ProductAllergen.contains == contains, ProductAllergen.deleted_at.is_(None)).values(deleted_at=now))
|
||||
|
||||
|
||||
|
||||
|
||||
await session.flush()
|
||||
return p
|
||||
|
||||
async def upsert_product(
|
||||
slug,
|
||||
href,
|
||||
d,
|
||||
):
|
||||
async with get_session() as session:
|
||||
try:
|
||||
await _upsert_product(session, d)
|
||||
await _log_product_result(session, ok=True, payload={
|
||||
"slug": slug,
|
||||
"href_tried": href,
|
||||
"title": d.get("title"),
|
||||
"has_description_html": bool(d.get("description_html")),
|
||||
"has_description_short": bool(d.get("description_short")),
|
||||
"sections_count": len(d.get("sections") or []),
|
||||
"images_count": len(d.get("images")),
|
||||
"embedded_images_count": len(d.get("embedded_image_urls")),
|
||||
"all_images_count": len(d.get("all_image_urls")),
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to upsert product '{d.get('slug')}'")
|
||||
print(f" Title: {d}.get('title')")
|
||||
print(f" URL: {d.get('suma_href')}")
|
||||
print(f" Error type: {type(e).__name__}")
|
||||
print(f" Error message: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
await _log_product_result(session, ok=False, payload={
|
||||
"slug": d.get("slug"),
|
||||
"href_tried": d.get("suma_href"),
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e),
|
||||
"title": d.get("title"),
|
||||
})
|
||||
raise
|
||||
await session.commit()
|
||||
Reference in New Issue
Block a user