feat: initialize market app with browsing, product, and scraping code
Some checks failed
Build and Deploy / build-and-deploy (push) Has been cancelled
Some checks failed
Build and Deploy / build-and-deploy (push) Has been cancelled
Split from coop monolith. Includes: - Market/browse/product blueprints - Product sync API - Suma scraping pipeline - Templates for market, browse, and product views - Dockerfile and CI workflow for independent deployment
This commit is contained in:
30
scrape/product/extractors/stickers.py
Normal file
30
scrape/product/extractors/stickers.py
Normal file
@@ -0,0 +1,30 @@
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import Dict, List
|
||||
from bs4 import BeautifulSoup
|
||||
from ..registry import extractor
|
||||
|
||||
@extractor
|
||||
def ex_stickers(soup: BeautifulSoup, url: str) -> Dict:
|
||||
"""
|
||||
<div class="stickers">
|
||||
<span class="sticker xxx"></span>
|
||||
...
|
||||
</div>
|
||||
"""
|
||||
root = soup.select_one("div.stickers")
|
||||
if not root:
|
||||
return {"stickers": []}
|
||||
stickers: List[str] = []
|
||||
seen = set()
|
||||
for sp in root.select("span.sticker"):
|
||||
classes = sp.get("class") or []
|
||||
extras = [c.strip() for c in classes if c and c.lower() != "sticker"]
|
||||
data_name = (sp.get("data-sticker") or "").strip()
|
||||
if data_name:
|
||||
extras.append(data_name)
|
||||
for x in extras:
|
||||
if x and x not in seen:
|
||||
seen.add(x)
|
||||
stickers.append(x)
|
||||
return {"stickers": stickers}
|
||||
Reference in New Issue
Block a user