market/scrape/product/extractors/title.py


from __future__ import annotations
from typing import Dict
from bs4 import BeautifulSoup
from utils import normalize_text
from ..registry import extractor

@extractor
def ex_title(soup: BeautifulSoup, url: str) -> Dict:
    title = None
    for sel in ["h1.page-title span", "h1.page-title", "h1.product-name", "meta[property='og:title']"]:
        el = soup.select_one(sel)
        if el:
            title = normalize_text(el.get_text()) if el.name != "meta" else el.get("content")
            if title:
                break
    return {"title": title or "Product"}