visual_processing_service.py 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515
  1. # ==================== visual_processing_service.py (FIXED - Dynamic Detection) ====================
  2. import torch
  3. import cv2
  4. import numpy as np
  5. import requests
  6. from io import BytesIO
  7. from PIL import Image
  8. from typing import Dict, List, Optional, Tuple
  9. import logging
  10. from transformers import CLIPProcessor, CLIPModel
  11. from sklearn.cluster import KMeans
  12. logger = logging.getLogger(__name__)
  13. class VisualProcessingService:
  14. """Service for extracting visual attributes from product images using CLIP."""
  15. # Class-level caching (shared across instances)
  16. _clip_model = None
  17. _clip_processor = None
  18. _device = None
  19. # Define category-specific attributes
  20. CATEGORY_ATTRIBUTES = {
  21. "clothing": {
  22. "products": ["t-shirt", "shirt", "dress", "pants", "jeans", "shorts",
  23. "skirt", "jacket", "coat", "sweater", "hoodie", "top", "blouse"],
  24. "attributes": {
  25. "pattern": ["solid color", "striped", "checkered", "graphic print", "floral", "geometric", "plain"],
  26. "material": ["cotton", "polyester", "denim", "leather", "silk", "wool", "linen", "blend"],
  27. "style": ["casual", "formal", "sporty", "streetwear", "elegant", "vintage", "bohemian"],
  28. "fit": ["slim fit", "regular fit", "loose fit", "oversized", "tailored"],
  29. "neckline": ["crew neck", "v-neck", "round neck", "collar", "scoop neck"],
  30. "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", "3/4 sleeve"],
  31. "closure_type": ["button", "zipper", "pull-on", "snap", "tie"]
  32. }
  33. },
  34. "tools": {
  35. "products": ["screwdriver", "hammer", "wrench", "pliers", "drill", "saw",
  36. "measuring tape", "level", "chisel", "file"],
  37. "attributes": {
  38. "material": ["steel", "aluminum", "plastic", "wood", "rubber", "chrome"],
  39. "type": ["manual", "electric", "pneumatic", "cordless", "corded"],
  40. "finish": ["chrome plated", "powder coated", "stainless steel", "painted"],
  41. "handle_type": ["rubber grip", "plastic", "wooden", "cushioned", "ergonomic"]
  42. }
  43. },
  44. "electronics": {
  45. "products": ["phone", "laptop", "tablet", "headphones", "speaker", "camera",
  46. "smartwatch", "charger", "mouse", "keyboard"],
  47. "attributes": {
  48. "material": ["plastic", "metal", "glass", "aluminum", "rubber"],
  49. "style": ["modern", "minimalist", "sleek", "industrial", "vintage"],
  50. "finish": ["matte", "glossy", "metallic", "textured"],
  51. "connectivity": ["wireless", "wired", "bluetooth", "USB"]
  52. }
  53. },
  54. "furniture": {
  55. "products": ["chair", "table", "sofa", "bed", "desk", "shelf", "cabinet",
  56. "dresser", "bench", "stool"],
  57. "attributes": {
  58. "material": ["wood", "metal", "glass", "plastic", "fabric", "leather"],
  59. "style": ["modern", "traditional", "industrial", "rustic", "contemporary", "vintage"],
  60. "finish": ["natural wood", "painted", "stained", "laminated", "upholstered"]
  61. }
  62. },
  63. "home_decor": {
  64. "products": ["painting", "canvas", "wall art", "frame", "vase", "lamp",
  65. "mirror", "clock", "sculpture", "poster"],
  66. "attributes": {
  67. "style": ["modern", "abstract", "traditional", "contemporary", "vintage", "minimalist"],
  68. "material": ["canvas", "wood", "metal", "glass", "ceramic", "paper"],
  69. "finish": ["glossy", "matte", "textured", "framed", "gallery wrapped"],
  70. "theme": ["nature", "geometric", "floral", "landscape", "portrait", "abstract"]
  71. }
  72. },
  73. "kitchen": {
  74. "products": ["pot", "pan", "knife", "utensil", "plate", "bowl", "cup",
  75. "appliance", "cutting board", "container"],
  76. "attributes": {
  77. "material": ["stainless steel", "aluminum", "ceramic", "glass", "plastic", "wood"],
  78. "finish": ["non-stick", "stainless", "enameled", "anodized"],
  79. "type": ["manual", "electric", "dishwasher safe"]
  80. }
  81. }
  82. }
  83. def __init__(self):
  84. pass
  85. @classmethod
  86. def _get_device(cls):
  87. """Get optimal device."""
  88. if cls._device is None:
  89. cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  90. logger.info(f"Visual Processing using device: {cls._device}")
  91. return cls._device
  92. @classmethod
  93. def _get_clip_model(cls):
  94. """Lazy load CLIP model with class-level caching."""
  95. if cls._clip_model is None:
  96. logger.info("Loading CLIP model (this may take a few minutes on first use)...")
  97. cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  98. cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  99. device = cls._get_device()
  100. cls._clip_model.to(device)
  101. cls._clip_model.eval()
  102. logger.info("✓ CLIP model loaded successfully")
  103. return cls._clip_model, cls._clip_processor
  104. def download_image(self, image_url: str) -> Optional[Image.Image]:
  105. """Download image from URL."""
  106. try:
  107. response = requests.get(image_url, timeout=10)
  108. response.raise_for_status()
  109. image = Image.open(BytesIO(response.content)).convert('RGB')
  110. return image
  111. except Exception as e:
  112. logger.error(f"Error downloading image from {image_url}: {str(e)}")
  113. return None
  114. def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
  115. """Extract dominant colors using K-means."""
  116. try:
  117. # Resize for faster processing
  118. img_small = image.resize((150, 150))
  119. img_array = np.array(img_small)
  120. pixels = img_array.reshape(-1, 3)
  121. # K-means clustering
  122. kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5)
  123. kmeans.fit(pixels)
  124. colors = []
  125. labels_counts = np.bincount(kmeans.labels_)
  126. for i, center in enumerate(kmeans.cluster_centers_):
  127. rgb = tuple(center.astype(int))
  128. color_name = self._get_color_name_simple(rgb)
  129. percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
  130. colors.append({
  131. "name": color_name,
  132. "rgb": rgb,
  133. "percentage": percentage
  134. })
  135. colors.sort(key=lambda x: x['percentage'], reverse=True)
  136. return colors
  137. except Exception as e:
  138. logger.error(f"Error extracting colors: {str(e)}")
  139. return []
  140. def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
  141. """
  142. Simple color name detection without webcolors dependency.
  143. Maps RGB to basic color names.
  144. """
  145. r, g, b = rgb
  146. # Define basic color ranges
  147. colors = {
  148. 'black': (r < 50 and g < 50 and b < 50),
  149. 'white': (r > 200 and g > 200 and b > 200),
  150. 'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
  151. 'red': (r > 150 and g < 100 and b < 100),
  152. 'green': (g > 150 and r < 100 and b < 100),
  153. 'blue': (b > 150 and r < 100 and g < 100),
  154. 'yellow': (r > 200 and g > 200 and b < 100),
  155. 'orange': (r > 200 and 100 < g < 200 and b < 100),
  156. 'purple': (r > 100 and b > 100 and g < 100),
  157. 'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
  158. 'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
  159. 'cyan': (r < 100 and g > 150 and b > 150),
  160. }
  161. for color_name, condition in colors.items():
  162. if condition:
  163. return color_name
  164. # Default fallback
  165. if r > g and r > b:
  166. return 'red'
  167. elif g > r and g > b:
  168. return 'green'
  169. elif b > r and b > g:
  170. return 'blue'
  171. else:
  172. return 'gray'
  173. def classify_with_clip(
  174. self,
  175. image: Image.Image,
  176. candidates: List[str],
  177. attribute_name: str,
  178. confidence_threshold: float = 0.15
  179. ) -> Dict:
  180. """Use CLIP to classify image against candidate labels."""
  181. try:
  182. model, processor = self._get_clip_model()
  183. device = self._get_device()
  184. # Prepare inputs
  185. inputs = processor(
  186. text=candidates,
  187. images=image,
  188. return_tensors="pt",
  189. padding=True
  190. )
  191. # Move to device
  192. inputs = {k: v.to(device) for k, v in inputs.items()}
  193. # Get predictions
  194. with torch.no_grad():
  195. outputs = model(**inputs)
  196. logits_per_image = outputs.logits_per_image
  197. probs = logits_per_image.softmax(dim=1).cpu()
  198. # Get top predictions
  199. top_k = min(3, len(candidates))
  200. top_probs, top_indices = torch.topk(probs[0], k=top_k)
  201. results = []
  202. for prob, idx in zip(top_probs, top_indices):
  203. if prob.item() > confidence_threshold:
  204. results.append({
  205. "value": candidates[idx.item()],
  206. "confidence": float(prob.item())
  207. })
  208. return {
  209. "attribute": attribute_name,
  210. "predictions": results
  211. }
  212. except Exception as e:
  213. logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  214. return {"attribute": attribute_name, "predictions": []}
  215. def detect_product_category(self, image: Image.Image) -> Tuple[str, float]:
  216. """
  217. First detect which category the product belongs to.
  218. Returns: (category_name, confidence)
  219. """
  220. # Get all product types from all categories
  221. all_categories = []
  222. category_map = {}
  223. for category, data in self.CATEGORY_ATTRIBUTES.items():
  224. for product in data["products"]:
  225. all_categories.append(f"a photo of a {product}")
  226. category_map[f"a photo of a {product}"] = category
  227. # Classify
  228. result = self.classify_with_clip(image, all_categories, "category_detection", confidence_threshold=0.10)
  229. if result["predictions"]:
  230. best_match = result["predictions"][0]
  231. detected_category = category_map[best_match["value"]]
  232. product_type = best_match["value"].replace("a photo of a ", "")
  233. confidence = best_match["confidence"]
  234. logger.info(f"Detected category: {detected_category}, product: {product_type}, confidence: {confidence:.3f}")
  235. return detected_category, product_type, confidence
  236. return "unknown", "unknown", 0.0
  237. def process_image(
  238. self,
  239. image_url: str,
  240. product_type_hint: Optional[str] = None
  241. ) -> Dict:
  242. """
  243. Main method to process image and extract visual attributes.
  244. Now dynamically detects product category first.
  245. """
  246. import time
  247. start_time = time.time()
  248. try:
  249. # Download image
  250. image = self.download_image(image_url)
  251. if image is None:
  252. return {
  253. "visual_attributes": {},
  254. "error": "Failed to download image"
  255. }
  256. visual_attributes = {}
  257. detailed_predictions = {}
  258. # Step 1: Detect product category
  259. detected_category, detected_product_type, category_confidence = self.detect_product_category(image)
  260. # If confidence is too low, return minimal info
  261. if category_confidence < 0.10:
  262. logger.warning(f"Low confidence in category detection ({category_confidence:.3f}). Returning basic attributes only.")
  263. colors = self.extract_dominant_colors(image, n_colors=3)
  264. if colors:
  265. visual_attributes["primary_color"] = colors[0]["name"]
  266. visual_attributes["color_palette"] = [c["name"] for c in colors]
  267. return {
  268. "visual_attributes": visual_attributes,
  269. "category_confidence": category_confidence,
  270. "processing_time": round(time.time() - start_time, 2)
  271. }
  272. # Add detected product type
  273. visual_attributes["product_type"] = detected_product_type
  274. visual_attributes["category"] = detected_category
  275. # Step 2: Extract color (universal attribute)
  276. colors = self.extract_dominant_colors(image, n_colors=3)
  277. if colors:
  278. visual_attributes["primary_color"] = colors[0]["name"]
  279. visual_attributes["color_palette"] = [c["name"] for c in colors]
  280. # Step 3: Extract category-specific attributes
  281. if detected_category in self.CATEGORY_ATTRIBUTES:
  282. category_config = self.CATEGORY_ATTRIBUTES[detected_category]
  283. for attr_name, attr_values in category_config["attributes"].items():
  284. # Use higher confidence threshold for category-specific attributes
  285. result = self.classify_with_clip(image, attr_values, attr_name, confidence_threshold=0.20)
  286. if result["predictions"]:
  287. # Only add if confidence is reasonable
  288. best_prediction = result["predictions"][0]
  289. if best_prediction["confidence"] > 0.20:
  290. visual_attributes[attr_name] = best_prediction["value"]
  291. detailed_predictions[attr_name] = result
  292. processing_time = time.time() - start_time
  293. return {
  294. "visual_attributes": visual_attributes,
  295. "detailed_predictions": detailed_predictions,
  296. "category_confidence": category_confidence,
  297. "processing_time": round(processing_time, 2)
  298. }
  299. except Exception as e:
  300. logger.error(f"Error processing image: {str(e)}")
  301. return {
  302. "visual_attributes": {},
  303. "error": str(e),
  304. "processing_time": round(time.time() - start_time, 2)
  305. }
  306. # # ==================== visual_processing_service_enhanced.py ====================
  307. # """
  308. # Enhanced Visual Processing Service combining CLIP's speed with BLIP-2's comprehensive taxonomy.
  309. # Features:
  310. # - Fast CLIP-based classification
  311. # - 70+ product categories across multiple domains
  312. # - Two-stage classification with validation
  313. # - Enhanced color normalization
  314. # - Category-specific attribute detection
  315. # - Confidence-based fallback mechanisms
  316. # - Optional center cropping for better focus
  317. # Usage:
  318. # service = VisualProcessingService()
  319. # result = service.process_image("https://example.com/product.jpg")
  320. # """
  321. # import torch
  322. # import cv2
  323. # import numpy as np
  324. # import requests
  325. # from io import BytesIO
  326. # from PIL import Image
  327. # from typing import Dict, List, Optional, Tuple
  328. # import logging
  329. # from transformers import CLIPProcessor, CLIPModel
  330. # from sklearn.cluster import KMeans
  331. # logger = logging.getLogger(__name__)
  332. # class VisualProcessingService:
  333. # """Enhanced service for extracting visual attributes from product images using CLIP."""
  334. # # Class-level caching (shared across instances)
  335. # _clip_model = None
  336. # _clip_processor = None
  337. # _device = None
  338. # # ==================== EXPANDED TAXONOMY ====================
  339. # # Base color vocabulary
  340. # COLORS = ["black", "white", "red", "blue", "green", "yellow", "gray",
  341. # "brown", "pink", "purple", "orange", "beige", "navy", "teal"]
  342. # # Pattern vocabulary
  343. # PATTERNS = ["solid", "striped", "checked", "plaid", "floral", "graphic",
  344. # "polka dot", "camo", "tie-dye", "abstract", "geometric"]
  345. # # Material vocabulary (extended)
  346. # MATERIALS = ["cotton", "polyester", "denim", "leather", "wool", "canvas",
  347. # "silicone", "metal", "fabric", "rubber", "plastic", "wood",
  348. # "glass", "ceramic", "steel", "foam", "aluminum", "carbon fiber"]
  349. # # Style vocabulary
  350. # STYLES = ["casual", "formal", "sporty", "streetwear", "elegant", "vintage",
  351. # "modern", "bohemian", "minimalist", "industrial", "rustic", "contemporary"]
  352. # # Fit vocabulary
  353. # FITS = ["slim fit", "regular fit", "loose fit", "oversized", "tailored",
  354. # "relaxed", "athletic fit"]
  355. # # Brand vocabulary (common marketplace brands)
  356. # BRANDS = ["nike", "adidas", "sony", "samsung", "apple", "generic", "lego",
  357. # "hasbro", "lg", "panasonic", "microsoft"]
  358. # # Age group vocabulary
  359. # AGE_GROUPS = ["baby", "toddler", "child", "teen", "adult", "all ages"]
  360. # # Comprehensive category-specific attributes
  361. # CATEGORY_ATTRIBUTES = {
  362. # # ==================== CLOTHING ====================
  363. # "clothing": {
  364. # "products": ["t-shirt", "shirt", "dress", "pants", "jeans", "shorts",
  365. # "skirt", "jacket", "coat", "sweater", "hoodie", "top",
  366. # "blouse", "cardigan", "blazer"],
  367. # "attributes": {
  368. # "color": COLORS,
  369. # "pattern": PATTERNS,
  370. # "material": ["cotton", "polyester", "denim", "leather", "silk",
  371. # "wool", "linen", "blend", "canvas"],
  372. # "style": STYLES,
  373. # "fit": FITS,
  374. # "neckline": ["crew neck", "v-neck", "round neck", "collar",
  375. # "scoop neck", "boat neck", "turtleneck"],
  376. # "sleeve_type": ["short sleeve", "long sleeve", "sleeveless",
  377. # "3/4 sleeve", "cap sleeve"],
  378. # "closure_type": ["button", "zipper", "pull-on", "snap", "tie", "buckle"]
  379. # }
  380. # },
  381. # # ==================== FOOTWEAR ====================
  382. # "footwear": {
  383. # "products": ["shoes", "sneakers", "sandals", "boots", "slippers",
  384. # "heels", "loafers"],
  385. # "attributes": {
  386. # "color": COLORS,
  387. # "material": ["leather", "synthetic", "canvas", "rubber", "suede", "fabric"],
  388. # "type": ["sneakers", "sandals", "formal", "boots", "sports", "casual"],
  389. # "style": STYLES,
  390. # "closure_type": ["lace-up", "slip-on", "velcro", "zipper", "buckle"]
  391. # }
  392. # },
  393. # # ==================== ACCESSORIES ====================
  394. # "accessories": {
  395. # "products": ["watch", "bag", "backpack", "handbag", "wallet", "belt",
  396. # "sunglasses", "hat", "scarf"],
  397. # "attributes": {
  398. # "color": COLORS,
  399. # "material": ["leather", "fabric", "metal", "plastic", "canvas", "synthetic"],
  400. # "style": STYLES,
  401. # "type": ["backpack", "tote", "crossbody", "messenger", "duffel"]
  402. # }
  403. # },
  404. # # ==================== JEWELRY ====================
  405. # "jewelry": {
  406. # "products": ["necklace", "ring", "bracelet", "earrings", "pendant", "chain"],
  407. # "attributes": {
  408. # "material": ["gold", "silver", "platinum", "stainless steel",
  409. # "plastic", "beads", "leather"],
  410. # "style": ["modern", "vintage", "minimalist", "statement", "elegant"],
  411. # "type": ["chain", "band", "solitaire", "hoop", "stud"]
  412. # }
  413. # },
  414. # # ==================== ELECTRONICS ====================
  415. # "electronics": {
  416. # "products": ["phone", "smartphone", "tablet", "laptop", "headphones",
  417. # "camera", "tv", "monitor", "keyboard", "mouse", "speaker",
  418. # "smartwatch", "charger"],
  419. # "attributes": {
  420. # "color": COLORS,
  421. # "material": ["plastic", "metal", "glass", "aluminum", "rubber"],
  422. # "style": ["modern", "minimalist", "sleek", "industrial"],
  423. # "finish": ["matte", "glossy", "metallic", "textured"],
  424. # "type": ["over-ear", "in-ear", "on-ear", "wireless", "wired"],
  425. # "brand": BRANDS
  426. # }
  427. # },
  428. # # ==================== FURNITURE ====================
  429. # "furniture": {
  430. # "products": ["chair", "table", "sofa", "bed", "desk", "shelf",
  431. # "cabinet", "dresser", "bench", "stool", "bookshelf"],
  432. # "attributes": {
  433. # "color": COLORS,
  434. # "material": ["wood", "metal", "glass", "plastic", "fabric", "leather"],
  435. # "style": ["modern", "traditional", "industrial", "rustic",
  436. # "contemporary", "vintage", "minimalist"],
  437. # "finish": ["natural wood", "painted", "stained", "laminated", "upholstered"]
  438. # }
  439. # },
  440. # # ==================== HOME DECOR ====================
  441. # "home_decor": {
  442. # "products": ["painting", "canvas", "wall art", "frame", "vase", "lamp",
  443. # "mirror", "clock", "sculpture", "poster", "cushion", "rug"],
  444. # "attributes": {
  445. # "color": COLORS,
  446. # "style": ["modern", "abstract", "traditional", "contemporary",
  447. # "vintage", "minimalist", "bohemian"],
  448. # "material": ["canvas", "wood", "metal", "glass", "ceramic", "paper", "fabric"],
  449. # "finish": ["glossy", "matte", "textured", "framed"],
  450. # "theme": ["nature", "geometric", "floral", "landscape", "abstract"]
  451. # }
  452. # },
  453. # # ==================== KITCHEN ====================
  454. # "kitchen": {
  455. # "products": ["pot", "pan", "knife", "utensil", "plate", "bowl", "cup",
  456. # "mug", "bottle", "container", "cutting board"],
  457. # "attributes": {
  458. # "material": ["stainless steel", "aluminum", "ceramic", "glass",
  459. # "plastic", "wood", "silicone"],
  460. # "finish": ["non-stick", "stainless", "enameled", "anodized"],
  461. # "type": ["frypan", "saucepan", "chef knife", "utility", "mixing"]
  462. # }
  463. # },
  464. # # ==================== APPLIANCES ====================
  465. # "appliances": {
  466. # "products": ["microwave", "blender", "vacuum", "fan", "toaster",
  467. # "coffee maker", "iron", "hair dryer"],
  468. # "attributes": {
  469. # "color": COLORS,
  470. # "type": ["upright", "robot", "handheld", "ceiling", "table", "tower"],
  471. # "power": ["low", "medium", "high", "variable"],
  472. # "brand": BRANDS
  473. # }
  474. # },
  475. # # ==================== BEAUTY & PERSONAL CARE ====================
  476. # "beauty": {
  477. # "products": ["lipstick", "perfume", "lotion", "hair dryer", "makeup",
  478. # "skincare", "nail polish", "shampoo"],
  479. # "attributes": {
  480. # "color": COLORS,
  481. # "type": ["eau de parfum", "eau de toilette", "body spray",
  482. # "body lotion", "face cream"],
  483. # "finish": ["matte", "glossy", "satin", "shimmer"]
  484. # }
  485. # },
  486. # # ==================== TOYS ====================
  487. # "toys": {
  488. # "products": ["doll", "puzzle", "board game", "action figure", "plush toy",
  489. # "toy car", "lego", "building blocks"],
  490. # "attributes": {
  491. # "color": COLORS,
  492. # "age_group": AGE_GROUPS,
  493. # "material": ["plastic", "wood", "fabric", "metal", "foam"],
  494. # "type": ["educational", "plush", "action", "vehicle", "puzzle", "board game"],
  495. # "brand": BRANDS
  496. # }
  497. # },
  498. # # ==================== SPORTS & OUTDOOR ====================
  499. # "sports": {
  500. # "products": ["bicycle", "football", "basketball", "tennis racket",
  501. # "yoga mat", "helmet", "skateboard", "dumbbells", "ball"],
  502. # "attributes": {
  503. # "color": COLORS,
  504. # "material": ["steel", "aluminum", "carbon fiber", "rubber",
  505. # "leather", "synthetic", "foam", "composite"],
  506. # "sport_type": ["football", "basketball", "tennis", "cycling",
  507. # "yoga", "gym", "outdoor", "fitness"],
  508. # "type": ["mountain", "road", "hybrid", "bmx", "indoor", "outdoor"],
  509. # "brand": BRANDS
  510. # }
  511. # },
  512. # # ==================== PET SUPPLIES ====================
  513. # "pet_supplies": {
  514. # "products": ["pet bed", "pet toy", "leash", "pet bowl", "collar",
  515. # "pet carrier"],
  516. # "attributes": {
  517. # "color": COLORS,
  518. # "material": ["fabric", "plastic", "metal", "nylon", "leather"],
  519. # "size": ["small", "medium", "large", "extra large"]
  520. # }
  521. # },
  522. # # ==================== BABY PRODUCTS ====================
  523. # "baby": {
  524. # "products": ["stroller", "baby bottle", "diaper", "crib", "high chair",
  525. # "baby carrier"],
  526. # "attributes": {
  527. # "color": COLORS,
  528. # "material": MATERIALS,
  529. # "type": ["full-size", "umbrella", "jogging", "disposable", "cloth"],
  530. # "age_group": ["newborn", "baby", "toddler"]
  531. # }
  532. # },
  533. # # ==================== TOOLS & HARDWARE ====================
  534. # "tools": {
  535. # "products": ["hammer", "drill", "screwdriver", "wrench", "saw",
  536. # "pliers", "measuring tape", "level"],
  537. # "attributes": {
  538. # "material": ["steel", "aluminum", "plastic", "wood", "rubber",
  539. # "chrome", "fiberglass"],
  540. # "type": ["manual", "electric", "cordless", "corded", "pneumatic"],
  541. # "finish": ["chrome plated", "powder coated", "stainless steel"],
  542. # "brand": BRANDS
  543. # }
  544. # },
  545. # # ==================== BOOKS & MEDIA ====================
  546. # "books_media": {
  547. # "products": ["book", "magazine", "dvd", "video game", "cd", "vinyl"],
  548. # "attributes": {
  549. # "type": ["paperback", "hardcover", "ebook", "audiobook"],
  550. # "genre": ["fiction", "non-fiction", "educational", "kids",
  551. # "action", "adventure", "sports", "rpg"]
  552. # }
  553. # },
  554. # # ==================== AUTOMOTIVE ====================
  555. # "automotive": {
  556. # "products": ["car accessory", "tire", "car seat", "steering wheel cover",
  557. # "floor mat"],
  558. # "attributes": {
  559. # "color": COLORS,
  560. # "material": ["rubber", "plastic", "fabric", "leather", "vinyl"],
  561. # "type": ["universal", "custom fit"]
  562. # }
  563. # },
  564. # # ==================== OFFICE SUPPLIES ====================
  565. # "office": {
  566. # "products": ["pen", "notebook", "folder", "desk organizer", "stapler",
  567. # "calculator", "paper"],
  568. # "attributes": {
  569. # "color": COLORS,
  570. # "material": ["paper", "plastic", "metal", "cardboard"],
  571. # "type": ["ruled", "blank", "grid", "dot grid"]
  572. # }
  573. # },
  574. # # ==================== GARDEN & OUTDOOR ====================
  575. # "garden": {
  576. # "products": ["plant pot", "garden tool", "watering can", "planter",
  577. # "garden hose", "lawn mower"],
  578. # "attributes": {
  579. # "color": COLORS,
  580. # "material": ["ceramic", "plastic", "metal", "terracotta", "wood"],
  581. # "type": ["indoor", "outdoor", "hanging", "standing"]
  582. # }
  583. # }
  584. # }
  585. # # Attribute-specific confidence thresholds
  586. # CONFIDENCE_THRESHOLDS = {
  587. # "color": 0.20,
  588. # "pattern": 0.25,
  589. # "material": 0.30,
  590. # "style": 0.20,
  591. # "fit": 0.25,
  592. # "brand": 0.40,
  593. # "type": 0.22,
  594. # "finish": 0.28,
  595. # "neckline": 0.23,
  596. # "sleeve_type": 0.23
  597. # }
  598. # def __init__(self):
  599. # pass
  600. # @classmethod
  601. # def _get_device(cls):
  602. # """Get optimal device."""
  603. # if cls._device is None:
  604. # cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  605. # logger.info(f"Visual Processing using device: {cls._device}")
  606. # return cls._device
  607. # @classmethod
  608. # def _get_clip_model(cls):
  609. # """Lazy load CLIP model with class-level caching."""
  610. # if cls._clip_model is None:
  611. # logger.info("Loading CLIP model (this may take a few minutes on first use)...")
  612. # cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  613. # cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  614. # device = cls._get_device()
  615. # cls._clip_model.to(device)
  616. # cls._clip_model.eval()
  617. # logger.info("✓ CLIP model loaded successfully")
  618. # return cls._clip_model, cls._clip_processor
  619. # def center_crop(self, image: Image.Image, rel_crop: float = 0.7) -> Image.Image:
  620. # """
  621. # Center-crop to focus on the product area if there is too much background.
  622. # Args:
  623. # image: PIL Image
  624. # rel_crop: Relative crop size (0.7 = 70% of min dimension)
  625. # """
  626. # w, h = image.size
  627. # side = int(min(w, h) * rel_crop)
  628. # left = (w - side) // 2
  629. # top = (h - side) // 2
  630. # return image.crop((left, top, left + side, top + side))
  631. # def download_image(self, image_url: str, apply_crop: bool = False,
  632. # max_size: Tuple[int, int] = (1024, 1024)) -> Optional[Image.Image]:
  633. # """
  634. # Download image from URL with optional preprocessing.
  635. # Args:
  636. # image_url: URL of the image
  637. # apply_crop: Whether to apply center crop
  638. # max_size: Maximum dimensions for resizing
  639. # """
  640. # try:
  641. # response = requests.get(image_url, timeout=10)
  642. # response.raise_for_status()
  643. # image = Image.open(BytesIO(response.content)).convert('RGB')
  644. # # Resize if too large
  645. # if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
  646. # image.thumbnail(max_size, Image.Resampling.LANCZOS)
  647. # # Optional center crop
  648. # if apply_crop:
  649. # image = self.center_crop(image, rel_crop=0.7)
  650. # return image
  651. # except Exception as e:
  652. # logger.error(f"Error downloading image from {image_url}: {str(e)}")
  653. # return None
  654. # def normalize_color(self, word: str) -> str:
  655. # """
  656. # Enhanced color normalization with aliases and modifiers.
  657. # Args:
  658. # word: Color word to normalize
  659. # """
  660. # w = word.lower().strip()
  661. # # Remove light/dark modifiers
  662. # w = w.replace("light ", "").replace("dark ", "")
  663. # w = w.replace("bright ", "").replace("pale ", "")
  664. # # Alias mapping
  665. # aliases = {
  666. # "grey": "gray",
  667. # "navy": "blue",
  668. # "navy blue": "blue",
  669. # "maroon": "red",
  670. # "crimson": "red",
  671. # "scarlet": "red",
  672. # "teal": "green",
  673. # "turquoise": "blue",
  674. # "cyan": "blue",
  675. # "indigo": "blue",
  676. # "violet": "purple",
  677. # "lavender": "purple",
  678. # "magenta": "pink",
  679. # "off white": "white",
  680. # "off-white": "white",
  681. # "cream": "beige",
  682. # "ivory": "white",
  683. # "khaki": "beige",
  684. # "tan": "brown",
  685. # "bronze": "brown",
  686. # "gold": "yellow",
  687. # "silver": "gray",
  688. # "charcoal": "gray"
  689. # }
  690. # normalized = aliases.get(w, w)
  691. # # Validate against canonical colors
  692. # if normalized not in [c.lower() for c in self.COLORS]:
  693. # # Try first word if it's a compound
  694. # first_word = normalized.split()[0] if ' ' in normalized else normalized
  695. # if first_word in [c.lower() for c in self.COLORS]:
  696. # return first_word
  697. # return normalized
  698. # def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
  699. # """Extract dominant colors using K-means clustering."""
  700. # try:
  701. # # Resize for faster processing
  702. # img_small = image.resize((150, 150))
  703. # img_array = np.array(img_small)
  704. # pixels = img_array.reshape(-1, 3)
  705. # # Sample if too many pixels
  706. # if len(pixels) > 10000:
  707. # indices = np.random.choice(len(pixels), 10000, replace=False)
  708. # pixels = pixels[indices]
  709. # # K-means clustering
  710. # kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5, max_iter=100)
  711. # kmeans.fit(pixels)
  712. # colors = []
  713. # labels_counts = np.bincount(kmeans.labels_)
  714. # for i, center in enumerate(kmeans.cluster_centers_):
  715. # rgb = tuple(center.astype(int))
  716. # color_name = self._get_color_name_simple(rgb)
  717. # percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
  718. # colors.append({
  719. # "name": color_name,
  720. # "rgb": rgb,
  721. # "percentage": percentage
  722. # })
  723. # colors.sort(key=lambda x: x['percentage'], reverse=True)
  724. # return colors
  725. # except Exception as e:
  726. # logger.error(f"Error extracting colors: {str(e)}")
  727. # return []
  728. # def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
  729. # """Simple RGB to color name mapping."""
  730. # r, g, b = rgb
  731. # # Define color ranges
  732. # colors = {
  733. # 'black': (r < 50 and g < 50 and b < 50),
  734. # 'white': (r > 200 and g > 200 and b > 200),
  735. # 'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
  736. # 'red': (r > 150 and g < 100 and b < 100),
  737. # 'green': (g > 150 and r < 100 and b < 100),
  738. # 'blue': (b > 150 and r < 100 and g < 100),
  739. # 'yellow': (r > 200 and g > 200 and b < 100),
  740. # 'orange': (r > 200 and 100 < g < 200 and b < 100),
  741. # 'purple': (r > 100 and b > 100 and g < 100),
  742. # 'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
  743. # 'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
  744. # 'beige': (150 < r < 220 and 140 < g < 200 and 100 < b < 180),
  745. # }
  746. # for color_name, condition in colors.items():
  747. # if condition:
  748. # return color_name
  749. # # Fallback based on dominant channel
  750. # if r > g and r > b:
  751. # return 'red'
  752. # elif g > r and g > b:
  753. # return 'green'
  754. # elif b > r and b > g:
  755. # return 'blue'
  756. # else:
  757. # return 'gray'
  758. # def classify_with_clip(
  759. # self,
  760. # image: Image.Image,
  761. # candidates: List[str],
  762. # attribute_name: str,
  763. # confidence_threshold: Optional[float] = None
  764. # ) -> Dict:
  765. # """
  766. # Use CLIP to classify image against candidate labels.
  767. # Args:
  768. # image: PIL Image
  769. # candidates: List of text labels to classify against
  770. # attribute_name: Name of the attribute being classified
  771. # confidence_threshold: Override default threshold
  772. # """
  773. # try:
  774. # model, processor = self._get_clip_model()
  775. # device = self._get_device()
  776. # # Use attribute-specific threshold if not provided
  777. # if confidence_threshold is None:
  778. # confidence_threshold = self.CONFIDENCE_THRESHOLDS.get(attribute_name, 0.20)
  779. # # Prepare inputs
  780. # inputs = processor(
  781. # text=candidates,
  782. # images=image,
  783. # return_tensors="pt",
  784. # padding=True
  785. # )
  786. # # Move to device
  787. # inputs = {k: v.to(device) for k, v in inputs.items()}
  788. # # Get predictions
  789. # with torch.no_grad():
  790. # outputs = model(**inputs)
  791. # logits_per_image = outputs.logits_per_image
  792. # probs = logits_per_image.softmax(dim=1).cpu()
  793. # # Get top predictions
  794. # top_k = min(3, len(candidates))
  795. # top_probs, top_indices = torch.topk(probs[0], k=top_k)
  796. # results = []
  797. # for prob, idx in zip(top_probs, top_indices):
  798. # if prob.item() > confidence_threshold:
  799. # value = candidates[idx.item()]
  800. # # Apply color normalization if color attribute
  801. # if attribute_name == "color":
  802. # value = self.normalize_color(value)
  803. # results.append({
  804. # "value": value,
  805. # "confidence": float(prob.item())
  806. # })
  807. # return {
  808. # "attribute": attribute_name,
  809. # "predictions": results
  810. # }
  811. # except Exception as e:
  812. # logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  813. # return {"attribute": attribute_name, "predictions": []}
  814. # def detect_category_hierarchical(self, image: Image.Image) -> Tuple[str, str, float]:
  815. # """
  816. # Two-stage hierarchical product detection:
  817. # 1. Detect broad category
  818. # 2. Detect specific product within that category
  819. # Returns:
  820. # (category, product_type, confidence)
  821. # """
  822. # # Stage 1: Detect broad category
  823. # category_names = list(self.CATEGORY_ATTRIBUTES.keys())
  824. # category_labels = [f"a photo of {cat.replace('_', ' ')}" for cat in category_names]
  825. # category_result = self.classify_with_clip(
  826. # image, category_labels, "category_detection", confidence_threshold=0.15
  827. # )
  828. # if not category_result["predictions"]:
  829. # return "unknown", "unknown", 0.0
  830. # # Extract category
  831. # best_category_match = category_result["predictions"][0]
  832. # detected_category = category_names[category_labels.index(best_category_match["value"])]
  833. # category_confidence = best_category_match["confidence"]
  834. # # Stage 2: Detect specific product within category
  835. # products_in_category = self.CATEGORY_ATTRIBUTES[detected_category]["products"]
  836. # product_labels = [f"a photo of a {p}" for p in products_in_category]
  837. # product_result = self.classify_with_clip(
  838. # image, product_labels, "product_detection", confidence_threshold=0.15
  839. # )
  840. # if product_result["predictions"]:
  841. # best_product = product_result["predictions"][0]
  842. # product_type = products_in_category[product_labels.index(best_product["value"])]
  843. # product_confidence = best_product["confidence"]
  844. # # Combined confidence (geometric mean for balance)
  845. # combined_confidence = (category_confidence * product_confidence) ** 0.5
  846. # logger.info(f"Detected: {detected_category} → {product_type} (confidence: {combined_confidence:.3f})")
  847. # return detected_category, product_type, combined_confidence
  848. # return detected_category, "unknown", category_confidence * 0.5
  849. # def detect_category_flat(self, image: Image.Image) -> Tuple[str, str, float]:
  850. # """
  851. # Single-stage flat product detection across all categories.
  852. # Faster but potentially less accurate.
  853. # Returns:
  854. # (category, product_type, confidence)
  855. # """
  856. # # Collect all products with their categories
  857. # all_products = []
  858. # product_to_category = {}
  859. # for category, data in self.CATEGORY_ATTRIBUTES.items():
  860. # for product in data["products"]:
  861. # label = f"a photo of a {product}"
  862. # all_products.append(label)
  863. # product_to_category[label] = category
  864. # # Classify
  865. # result = self.classify_with_clip(
  866. # image, all_products, "product_detection", confidence_threshold=0.15
  867. # )
  868. # if result["predictions"]:
  869. # best_match = result["predictions"][0]
  870. # product_label = best_match["value"]
  871. # category = product_to_category[product_label]
  872. # product_type = product_label.replace("a photo of a ", "")
  873. # confidence = best_match["confidence"]
  874. # logger.info(f"Detected: {category} → {product_type} (confidence: {confidence:.3f})")
  875. # return category, product_type, confidence
  876. # return "unknown", "unknown", 0.0
  877. # def process_image(
  878. # self,
  879. # image_url: str,
  880. # product_type_hint: Optional[str] = None,
  881. # apply_crop: bool = False,
  882. # detection_mode: str = "hierarchical"
  883. # ) -> Dict:
  884. # """
  885. # Main method to process image and extract visual attributes.
  886. # Args:
  887. # image_url: URL of the product image
  888. # product_type_hint: Optional hint about product type
  889. # apply_crop: Whether to apply center crop for better focus
  890. # detection_mode: "hierarchical" (slower, more accurate) or "flat" (faster)
  891. # """
  892. # import time
  893. # start_time = time.time()
  894. # try:
  895. # # Download image
  896. # image = self.download_image(image_url, apply_crop=apply_crop)
  897. # if image is None:
  898. # return {
  899. # "visual_attributes": {},
  900. # "error": "Failed to download image"
  901. # }
  902. # visual_attributes = {}
  903. # detailed_predictions = {}
  904. # # Step 1: Detect product category and type
  905. # if detection_mode == "hierarchical":
  906. # detected_category, detected_product_type, category_confidence = \
  907. # self.detect_category_hierarchical(image)
  908. # else:
  909. # detected_category, detected_product_type, category_confidence = \
  910. # self.detect_category_flat(image)
  911. # # If confidence is too low, return minimal info
  912. # if category_confidence < 0.12:
  913. # logger.warning(f"Low confidence ({category_confidence:.3f}). Returning basic attributes only.")
  914. # colors = self.extract_dominant_colors(image, n_colors=3)
  915. # if colors:
  916. # visual_attributes["primary_color"] = colors[0]["name"]
  917. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  918. # return {
  919. # "visual_attributes": visual_attributes,
  920. # "category_confidence": category_confidence,
  921. # "processing_time": round(time.time() - start_time, 2),
  922. # "warning": "Low confidence detection"
  923. # }
  924. # # Add detected information
  925. # visual_attributes["product_type"] = detected_product_type
  926. # visual_attributes["category"] = detected_category
  927. # visual_attributes["detection_confidence"] = round(category_confidence, 3)
  928. # # Step 2: Extract universal color attribute
  929. # colors = self.extract_dominant_colors(image, n_colors=3)
  930. # if colors:
  931. # visual_attributes["primary_color"] = colors[0]["name"]
  932. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  933. # visual_attributes["color_distribution"] = [
  934. # {"name": c["name"], "percentage": round(c["percentage"], 1)}
  935. # for c in colors
  936. # ]
  937. # # Step 3: Extract category-specific attributes
  938. # if detected_category in self.CATEGORY_ATTRIBUTES:
  939. # category_config = self.CATEGORY_ATTRIBUTES[detected_category]
  940. # for attr_name, attr_values in category_config["attributes"].items():
  941. # # Skip color since we already extracted it
  942. # if attr_name == "color":
  943. # continue
  944. # # Get attribute-specific threshold
  945. # threshold = self.CONFIDENCE_THRESHOLDS.get(attr_name, 0.20)
  946. # # Classify
  947. # result = self.classify_with_clip(
  948. # image, attr_values, attr_name, confidence_threshold=threshold
  949. # )
  950. # detailed_predictions[attr_name] = result
  951. # # Only add if confidence is reasonable
  952. # if result["predictions"]:
  953. # best_prediction = result["predictions"][0]
  954. # if best_prediction["confidence"] > threshold:
  955. # visual_attributes[attr_name] = best_prediction["value"]
  956. # processing_time = time.time() - start_time
  957. # return {
  958. # "visual_attributes": visual_attributes,
  959. # "detailed_predictions": detailed_predictions,
  960. # "detection_confidence": round(category_confidence, 3),
  961. # "processing_time": round(processing_time, 2),
  962. # "metadata": {
  963. # "detection_mode": detection_mode,
  964. # "crop_applied": apply_crop,
  965. # "image_size": image.size
  966. # }
  967. # }
  968. # except Exception as e:
  969. # logger.error(f"Error processing image: {str(e)}")
  970. # import traceback
  971. # traceback.print_exc()
  972. # return {
  973. # "visual_attributes": {},
  974. # "error": str(e),
  975. # "processing_time": round(time.time() - start_time, 2)
  976. # }
  977. # def batch_process_images(
  978. # self,
  979. # image_urls: List[str],
  980. # detection_mode: str = "flat"
  981. # ) -> List[Dict]:
  982. # """
  983. # Process multiple images in batch.
  984. # Args:
  985. # image_urls: List of image URLs
  986. # detection_mode: Detection mode to use
  987. # """
  988. # results = []
  989. # for i, url in enumerate(image_urls):
  990. # logger.info(f"Processing image {i+1}/{len(image_urls)}: {url}")
  991. # result = self.process_image(url, detection_mode=detection_mode)
  992. # results.append(result)
  993. # return results
  994. # @classmethod
  995. # def cleanup_models(cls):
  996. # """Free up memory by unloading models."""
  997. # if cls._clip_model is not None:
  998. # del cls._clip_model
  999. # del cls._clip_processor
  1000. # cls._clip_model = None
  1001. # cls._clip_processor = None
  1002. # if torch.cuda.is_available():
  1003. # torch.cuda.empty_cache()
  1004. # logger.info("Models unloaded and memory freed")
  1005. # def get_supported_categories(self) -> List[str]:
  1006. # """Get list of all supported product categories."""
  1007. # return list(self.CATEGORY_ATTRIBUTES.keys())
  1008. # def get_category_products(self, category: str) -> List[str]:
  1009. # """Get list of products in a specific category."""
  1010. # return self.CATEGORY_ATTRIBUTES.get(category, {}).get("products", [])
  1011. # def get_category_attributes(self, category: str) -> Dict[str, List[str]]:
  1012. # """Get attribute schema for a specific category."""
  1013. # return self.CATEGORY_ATTRIBUTES.get(category, {}).get("attributes", {})
  1014. # def get_statistics(self) -> Dict:
  1015. # """Get statistics about the taxonomy."""
  1016. # total_products = sum(
  1017. # len(data["products"])
  1018. # for data in self.CATEGORY_ATTRIBUTES.values()
  1019. # )
  1020. # total_attributes = sum(
  1021. # len(data["attributes"])
  1022. # for data in self.CATEGORY_ATTRIBUTES.values()
  1023. # )
  1024. # return {
  1025. # "total_categories": len(self.CATEGORY_ATTRIBUTES),
  1026. # "total_products": total_products,
  1027. # "total_unique_attributes": len(set(
  1028. # attr
  1029. # for data in self.CATEGORY_ATTRIBUTES.values()
  1030. # for attr in data["attributes"].keys()
  1031. # )),
  1032. # "categories": list(self.CATEGORY_ATTRIBUTES.keys())
  1033. # }
  1034. # # ==================== USAGE EXAMPLES ====================
  1035. # def example_basic_usage():
  1036. # """Basic usage example."""
  1037. # print("=== Basic Usage Example ===\n")
  1038. # # Initialize service
  1039. # service = VisualProcessingService()
  1040. # # Process single image (hierarchical mode - more accurate)
  1041. # result = service.process_image(
  1042. # "https://example.com/product.jpg",
  1043. # detection_mode="hierarchical"
  1044. # )
  1045. # print("Product Type:", result["visual_attributes"].get("product_type"))
  1046. # print("Category:", result["visual_attributes"].get("category"))
  1047. # print("Primary Color:", result["visual_attributes"].get("primary_color"))
  1048. # print("Detection Confidence:", result.get("detection_confidence"))
  1049. # print("Processing Time:", result["processing_time"], "seconds")
  1050. # print("\nAll Attributes:")
  1051. # for key, value in result["visual_attributes"].items():
  1052. # print(f" {key}: {value}")
  1053. # def example_fast_mode():
  1054. # """Fast processing mode example."""
  1055. # print("\n=== Fast Mode Example ===\n")
  1056. # service = VisualProcessingService()
  1057. # # Fast mode (flat detection)
  1058. # result = service.process_image(
  1059. # "https://example.com/product.jpg",
  1060. # detection_mode="flat" # Faster, single-stage detection
  1061. # )
  1062. # print("Processing Time:", result["processing_time"], "seconds")
  1063. # print("Detected:", result["visual_attributes"])
  1064. # def example_with_cropping():
  1065. # """Example with center cropping for busy backgrounds."""
  1066. # print("\n=== With Center Cropping ===\n")
  1067. # service = VisualProcessingService()
  1068. # # Apply center crop to focus on product
  1069. # result = service.process_image(
  1070. # "https://example.com/product-with-background.jpg",
  1071. # apply_crop=True, # Enable center cropping
  1072. # detection_mode="hierarchical"
  1073. # )
  1074. # print("Crop Applied:", result["metadata"]["crop_applied"])
  1075. # print("Detected:", result["visual_attributes"])
  1076. # def example_batch_processing():
  1077. # """Batch processing example."""
  1078. # print("\n=== Batch Processing ===\n")
  1079. # service = VisualProcessingService()
  1080. # image_urls = [
  1081. # "https://example.com/product1.jpg",
  1082. # "https://example.com/product2.jpg",
  1083. # "https://example.com/product3.jpg"
  1084. # ]
  1085. # results = service.batch_process_images(image_urls, detection_mode="flat")
  1086. # for i, result in enumerate(results):
  1087. # print(f"\nProduct {i+1}:")
  1088. # print(f" Type: {result['visual_attributes'].get('product_type')}")
  1089. # print(f" Category: {result['visual_attributes'].get('category')}")
  1090. # print(f" Time: {result['processing_time']}s")
  1091. # def example_category_info():
  1092. # """Get information about supported categories."""
  1093. # print("\n=== Category Information ===\n")
  1094. # service = VisualProcessingService()
  1095. # # Get statistics
  1096. # stats = service.get_statistics()
  1097. # print("Statistics:")
  1098. # print(f" Total Categories: {stats['total_categories']}")
  1099. # print(f" Total Products: {stats['total_products']}")
  1100. # print(f" Unique Attributes: {stats['total_unique_attributes']}")
  1101. # # Get all categories
  1102. # categories = service.get_supported_categories()
  1103. # print(f"\nSupported Categories ({len(categories)}):")
  1104. # for cat in categories:
  1105. # products = service.get_category_products(cat)
  1106. # print(f" {cat}: {len(products)} products")
  1107. # # Get attributes for a specific category
  1108. # print("\nClothing Category Attributes:")
  1109. # clothing_attrs = service.get_category_attributes("clothing")
  1110. # for attr, values in clothing_attrs.items():
  1111. # print(f" {attr}: {len(values)} options")
  1112. # def example_detailed_predictions():
  1113. # """Example showing detailed predictions with confidence scores."""
  1114. # print("\n=== Detailed Predictions ===\n")
  1115. # service = VisualProcessingService()
  1116. # result = service.process_image(
  1117. # "https://example.com/product.jpg",
  1118. # detection_mode="hierarchical"
  1119. # )
  1120. # print("Visual Attributes (Best Predictions):")
  1121. # for key, value in result["visual_attributes"].items():
  1122. # print(f" {key}: {value}")
  1123. # print("\nDetailed Predictions (Top 3 for each attribute):")
  1124. # for attr_name, predictions in result.get("detailed_predictions", {}).items():
  1125. # print(f"\n {attr_name}:")
  1126. # for pred in predictions.get("predictions", []):
  1127. # print(f" - {pred['value']}: {pred['confidence']:.3f}")
  1128. # def example_color_distribution():
  1129. # """Example showing color palette extraction."""
  1130. # print("\n=== Color Distribution ===\n")
  1131. # service = VisualProcessingService()
  1132. # result = service.process_image("https://example.com/product.jpg")
  1133. # print("Primary Color:", result["visual_attributes"].get("primary_color"))
  1134. # print("\nColor Palette:")
  1135. # for color in result["visual_attributes"].get("color_palette", []):
  1136. # print(f" - {color}")
  1137. # print("\nColor Distribution:")
  1138. # for color_info in result["visual_attributes"].get("color_distribution", []):
  1139. # print(f" {color_info['name']}: {color_info['percentage']}%")
  1140. # def example_error_handling():
  1141. # """Example showing error handling."""
  1142. # print("\n=== Error Handling ===\n")
  1143. # service = VisualProcessingService()
  1144. # # Invalid URL
  1145. # result = service.process_image("https://invalid-url.com/nonexistent.jpg")
  1146. # if "error" in result:
  1147. # print("Error occurred:", result["error"])
  1148. # else:
  1149. # print("Processing successful")
  1150. # # Low confidence warning
  1151. # result = service.process_image("https://example.com/ambiguous-product.jpg")
  1152. # if "warning" in result:
  1153. # print("Warning:", result["warning"])
  1154. # print("Confidence:", result.get("category_confidence"))
  1155. # def example_cleanup():
  1156. # """Example showing model cleanup."""
  1157. # print("\n=== Model Cleanup ===\n")
  1158. # service = VisualProcessingService()
  1159. # # Process some images
  1160. # result = service.process_image("https://example.com/product.jpg")
  1161. # print("Processed successfully")
  1162. # # Clean up models when done (frees memory)
  1163. # VisualProcessingService.cleanup_models()
  1164. # print("Models cleaned up and memory freed")
  1165. # # ==================== PRODUCTION USAGE ====================
  1166. # def production_example():
  1167. # """
  1168. # Production-ready example with proper error handling and logging.
  1169. # """
  1170. # import logging
  1171. # # Setup logging
  1172. # logging.basicConfig(
  1173. # level=logging.INFO,
  1174. # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
  1175. # )
  1176. # service = VisualProcessingService()
  1177. # def process_product_image(image_url: str, product_id: str) -> Dict:
  1178. # """
  1179. # Process a product image with full error handling.
  1180. # """
  1181. # try:
  1182. # # Process with hierarchical mode for best accuracy
  1183. # result = service.process_image(
  1184. # image_url,
  1185. # detection_mode="hierarchical",
  1186. # apply_crop=False # Set True if images have busy backgrounds
  1187. # )
  1188. # # Check for errors
  1189. # if "error" in result:
  1190. # logger.error(f"Failed to process {product_id}: {result['error']}")
  1191. # return {
  1192. # "product_id": product_id,
  1193. # "status": "error",
  1194. # "error": result["error"]
  1195. # }
  1196. # # Check confidence
  1197. # confidence = result.get("detection_confidence", 0)
  1198. # if confidence < 0.15:
  1199. # logger.warning(f"Low confidence for {product_id}: {confidence}")
  1200. # return {
  1201. # "product_id": product_id,
  1202. # "status": "low_confidence",
  1203. # "confidence": confidence,
  1204. # "partial_attributes": result["visual_attributes"]
  1205. # }
  1206. # # Success
  1207. # return {
  1208. # "product_id": product_id,
  1209. # "status": "success",
  1210. # "attributes": result["visual_attributes"],
  1211. # "confidence": confidence,
  1212. # "processing_time": result["processing_time"]
  1213. # }
  1214. # except Exception as e:
  1215. # logger.exception(f"Unexpected error processing {product_id}")
  1216. # return {
  1217. # "product_id": product_id,
  1218. # "status": "exception",
  1219. # "error": str(e)
  1220. # }
  1221. # # Process products
  1222. # products = [
  1223. # {"id": "PROD001", "image_url": "https://example.com/tshirt.jpg"},
  1224. # {"id": "PROD002", "image_url": "https://example.com/laptop.jpg"},
  1225. # {"id": "PROD003", "image_url": "https://example.com/chair.jpg"}
  1226. # ]
  1227. # results = []
  1228. # for product in products:
  1229. # result = process_product_image(product["image_url"], product["id"])
  1230. # results.append(result)
  1231. # # Print summary
  1232. # if result["status"] == "success":
  1233. # attrs = result["attributes"]
  1234. # print(f"\n✓ {product['id']} ({result['processing_time']}s):")
  1235. # print(f" Type: {attrs.get('product_type')}")
  1236. # print(f" Category: {attrs.get('category')}")
  1237. # print(f" Color: {attrs.get('primary_color')}")
  1238. # else:
  1239. # print(f"\n✗ {product['id']}: {result['status']}")
  1240. # return results
  1241. # # ==================== MAIN ====================
  1242. # if __name__ == "__main__":
  1243. # # Run examples
  1244. # print("Enhanced Visual Processing Service")
  1245. # print("=" * 60)
  1246. # # Show statistics
  1247. # service = VisualProcessingService()
  1248. # stats = service.get_statistics()
  1249. # print(f"\nTaxonomy Coverage:")
  1250. # print(f" Categories: {stats['total_categories']}")
  1251. # print(f" Products: {stats['total_products']}")
  1252. # print(f" Attributes: {stats['total_unique_attributes']}")
  1253. # print("\n" + "=" * 60)
  1254. # print("Run individual examples by calling the example functions:")
  1255. # print(" - example_basic_usage()")
  1256. # print(" - example_fast_mode()")
  1257. # print(" - example_with_cropping()")
  1258. # print(" - example_batch_processing()")
  1259. # print(" - example_category_info()")
  1260. # print(" - example_detailed_predictions()")
  1261. # print(" - example_color_distribution()")
  1262. # print(" - production_example()")
  1263. # print("=" * 60)