from __future__ import annotations from typing import Dict, List from bs4 import BeautifulSoup from utils import normalize_text from ..registry import extractor @extractor def ex_labels(soup: BeautifulSoup, url: str) -> Dict: """ From: Returns "labels": lower-cased union of class hints and visible text. """ root = soup.select_one("ul.cdz-product-labels") if not root: return {} items: List[str] = [] texts: List[str] = [] for li in root.select("li.label-item"): for c in (li.get("class") or []): c = (c or "").strip() if c and c.lower() != "label-item" and c not in items: items.append(c) txt = normalize_text(li.get_text()) if txt and txt not in texts: texts.append(txt) if not items and not texts: return {} union = [] seen = set() for s in items + [t.lower() for t in texts]: key = (s or "").strip().lower() if key and key not in seen: seen.add(key) union.append(key) return {"labels": union}