Id 72391227 Mango Indo18 Verified — Payudara Mulus Basah Dmx Arummm Cantik

import re
from dataclasses import dataclass, asdict
from typing import List, Optional, Dict
# -------------------------------------------------
# 1️⃣ CONFIGURATION – extend these as needed
# -------------------------------------------------
KNOWN_KEYWORDS = 
    "payudara", "mulus", "basah", "cantik",   # descriptive adjectives
KNOWN_BRANDS   = "dmx", "arummm", "mango"
KNOWN_PLATFORMS = "indo18"   # you can add more platforms here
# -------------------------------------------------
# 2️⃣ DATA MODEL
# -------------------------------------------------
@dataclass
class MetaInfo:
    keywords: List[str]
    brand: Optional[str] = None
    series: Optional[str] = None
    numeric_id: Optional[str] = None
    platform: Optional[str] = None
    is_verified: bool = False
# -------------------------------------------------
# 3️⃣ PARSER LOGIC
# -------------------------------------------------
ID_PATTERN = re.compile(r"\b(?:id|ID)\s*(\d5,)\b", flags=re.IGNORECASE)
VERIFIED_PATTERN = re.compile(r"\bverified\b", flags=re.IGNORECASE)
def parse_raw_title(raw: str) -> MetaInfo:
    """
    Extracts structured metadata from a free‑form title string.
    """
    # Normalise whitespace and lower‑case for matching (keep original for ID extraction)
    tokens = raw.strip().split()
    lowered = [t.lower() for t in tokens]
# 1️⃣ Detect numeric ID
    id_match = ID_PATTERN.search(raw)
    numeric_id = id_match.group(1) if id_match else None
# 2️⃣ Detect verification flag
    is_verified = bool(VERIFIED_PATTERN.search(raw))
# 3️⃣ Find known brand / series (first match wins)
    brand = next((tok for tok in lowered if tok in KNOWN_BRANDS), None)
# 4️⃣ Find platform tag
    platform = next((tok for tok in lowered if tok in KNOWN_PLATFORMS), None)
# 5️⃣ Gather free‑form descriptive keywords (exclude already‑used tokens)
    excluded = brand, platform, "id", numeric_id, "verified"
    keywords = [tok for tok in lowered
                if tok not in excluded and tok.isalpha() and tok not in KNOWN_BRANDS]
# 6️⃣ Filter keywords against the known‑keyword list (optional)
    #    If you want to keep *all* free‑form words, comment the line below.
    keywords = [kw for kw in keywords if kw in KNOWN_KEYWORDS]
return MetaInfo(
        keywords=keywords,
        brand=brand,
        series=None,               # placeholder – can be derived from other patterns
        numeric_id=numeric_id,
        platform=platform,
        is_verified=is_verified,
    )
# -------------------------------------------------
# 4️⃣ USAGE EXAMPLE
# -------------------------------------------------
if __name__ == "__main__":
    raw_example = "payudara mulus basah dmx arummm cantik id 72391227 mango indo18 verified"
    meta = parse_raw_title(raw_example)
    print("Parsed metadata →", asdict(meta))


  "keywords": ["payudara", "mulus", "basah", "cantik"],
  "brand": "dmx",
  "series": null,
  "numeric_id": "72391227",
  "platform": "indo18",
  "is_verified": true

The word “arummm” is not in the KNOWN_BRANDS set, so it falls back to being ignored (or you can add it to the brand list).

| Target system | How you would plug the parser in | |---------------|----------------------------------| | Web back‑end (e.g., Flask/Django) | Call parse_raw_title() when a user submits a new title, store the resulting dict in your DB model. | | CLI batch importer | Loop over a CSV file, feed each title to the parser, write the JSON output to a new column or a separate file. | | Realtime chat bot / moderation tool | Run the parser on every incoming message; if is_verified is False you could flag the content for review. | | Search indexer (Elasticsearch / Algolia) | Index each field (keywords, brand, platform, etc.) separately for faceted navigation. | import re from dataclasses import dataclass, asdict from