Phase 1-3 of decoupling: - path_setup.py adds project root to sys.path - Market-owned models in market/models/ (market, market_place) - All imports updated: shared.infrastructure, shared.db, shared.browser, etc. - MarketPlace uses container_type/container_id instead of post_id FK Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
|
|
from __future__ import annotations
|
|
from typing import Dict, List
|
|
from bs4 import BeautifulSoup
|
|
from shared.utils import normalize_text
|
|
from ..registry import extractor
|
|
|
|
@extractor
|
|
def ex_labels(soup: BeautifulSoup, url: str) -> Dict:
|
|
"""
|
|
From:
|
|
<ul class="cdz-product-labels">
|
|
<li class="label-item new"><div class="label-content">NEW</div></li>
|
|
</ul>
|
|
Returns "labels": lower-cased union of class hints and visible text.
|
|
"""
|
|
root = soup.select_one("ul.cdz-product-labels")
|
|
if not root:
|
|
return {}
|
|
items: List[str] = []
|
|
texts: List[str] = []
|
|
|
|
for li in root.select("li.label-item"):
|
|
for c in (li.get("class") or []):
|
|
c = (c or "").strip()
|
|
if c and c.lower() != "label-item" and c not in items:
|
|
items.append(c)
|
|
txt = normalize_text(li.get_text())
|
|
if txt and txt not in texts:
|
|
texts.append(txt)
|
|
|
|
if not items and not texts:
|
|
return {}
|
|
union = []
|
|
seen = set()
|
|
for s in items + [t.lower() for t in texts]:
|
|
key = (s or "").strip().lower()
|
|
if key and key not in seen:
|
|
seen.add(key)
|
|
union.append(key)
|
|
return {"labels": union}
|