visual_processing_service.py 85 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090
  1. # # ==================== visual_processing_service.py (FIXED - Dynamic Detection) ====================
  2. # import torch
  3. # import cv2
  4. # import numpy as np
  5. # import requests
  6. # from io import BytesIO
  7. # from PIL import Image
  8. # from typing import Dict, List, Optional, Tuple
  9. # import logging
  10. # from transformers import CLIPProcessor, CLIPModel
  11. # from sklearn.cluster import KMeans
  12. # logger = logging.getLogger(__name__)
  13. # class VisualProcessingService:
  14. # """Service for extracting visual attributes from product images using CLIP."""
  15. # # Class-level caching (shared across instances)
  16. # _clip_model = None
  17. # _clip_processor = None
  18. # _device = None
  19. # # Define category-specific attributes
  20. # CATEGORY_ATTRIBUTES = {
  21. # "clothing": {
  22. # "products": ["t-shirt", "shirt", "dress", "pants", "jeans", "shorts",
  23. # "skirt", "jacket", "coat", "sweater", "hoodie", "top", "blouse"],
  24. # "attributes": {
  25. # "pattern": ["solid color", "striped", "checkered", "graphic print", "floral", "geometric", "plain"],
  26. # "material": ["cotton", "polyester", "denim", "leather", "silk", "wool", "linen", "blend"],
  27. # "style": ["casual", "formal", "sporty", "streetwear", "elegant", "vintage", "bohemian"],
  28. # "fit": ["slim fit", "regular fit", "loose fit", "oversized", "tailored"],
  29. # "neckline": ["crew neck", "v-neck", "round neck", "collar", "scoop neck"],
  30. # "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", "3/4 sleeve"],
  31. # "closure_type": ["button", "zipper", "pull-on", "snap", "tie"]
  32. # }
  33. # },
  34. # "tools": {
  35. # "products": ["screwdriver", "hammer", "wrench", "pliers", "drill", "saw",
  36. # "measuring tape", "level", "chisel", "file"],
  37. # "attributes": {
  38. # "material": ["steel", "aluminum", "plastic", "wood", "rubber", "chrome"],
  39. # "type": ["manual", "electric", "pneumatic", "cordless", "corded"],
  40. # "finish": ["chrome plated", "powder coated", "stainless steel", "painted"],
  41. # "handle_type": ["rubber grip", "plastic", "wooden", "cushioned", "ergonomic"]
  42. # }
  43. # },
  44. # "electronics": {
  45. # "products": ["phone", "laptop", "tablet", "headphones", "speaker", "camera",
  46. # "smartwatch", "charger", "mouse", "keyboard"],
  47. # "attributes": {
  48. # "material": ["plastic", "metal", "glass", "aluminum", "rubber"],
  49. # "style": ["modern", "minimalist", "sleek", "industrial", "vintage"],
  50. # "finish": ["matte", "glossy", "metallic", "textured"],
  51. # "connectivity": ["wireless", "wired", "bluetooth", "USB"]
  52. # }
  53. # },
  54. # "furniture": {
  55. # "products": ["chair", "table", "sofa", "bed", "desk", "shelf", "cabinet",
  56. # "dresser", "bench", "stool"],
  57. # "attributes": {
  58. # "material": ["wood", "metal", "glass", "plastic", "fabric", "leather"],
  59. # "style": ["modern", "traditional", "industrial", "rustic", "contemporary", "vintage"],
  60. # "finish": ["natural wood", "painted", "stained", "laminated", "upholstered"]
  61. # }
  62. # },
  63. # "home_decor": {
  64. # "products": ["painting", "canvas", "wall art", "frame", "vase", "lamp",
  65. # "mirror", "clock", "sculpture", "poster"],
  66. # "attributes": {
  67. # "style": ["modern", "abstract", "traditional", "contemporary", "vintage", "minimalist"],
  68. # "material": ["canvas", "wood", "metal", "glass", "ceramic", "paper"],
  69. # "finish": ["glossy", "matte", "textured", "framed", "gallery wrapped"],
  70. # "theme": ["nature", "geometric", "floral", "landscape", "portrait", "abstract"]
  71. # }
  72. # },
  73. # "kitchen": {
  74. # "products": ["pot", "pan", "knife", "utensil", "plate", "bowl", "cup",
  75. # "appliance", "cutting board", "container"],
  76. # "attributes": {
  77. # "material": ["stainless steel", "aluminum", "ceramic", "glass", "plastic", "wood"],
  78. # "finish": ["non-stick", "stainless", "enameled", "anodized"],
  79. # "type": ["manual", "electric", "dishwasher safe"]
  80. # }
  81. # }
  82. # }
  83. # def __init__(self):
  84. # pass
  85. # @classmethod
  86. # def _get_device(cls):
  87. # """Get optimal device."""
  88. # if cls._device is None:
  89. # cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  90. # logger.info(f"Visual Processing using device: {cls._device}")
  91. # return cls._device
  92. # @classmethod
  93. # def _get_clip_model(cls):
  94. # """Lazy load CLIP model with class-level caching."""
  95. # if cls._clip_model is None:
  96. # logger.info("Loading CLIP model (this may take a few minutes on first use)...")
  97. # cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  98. # cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  99. # device = cls._get_device()
  100. # cls._clip_model.to(device)
  101. # cls._clip_model.eval()
  102. # logger.info("✓ CLIP model loaded successfully")
  103. # return cls._clip_model, cls._clip_processor
  104. # def download_image(self, image_url: str) -> Optional[Image.Image]:
  105. # """Download image from URL."""
  106. # try:
  107. # response = requests.get(image_url, timeout=10)
  108. # response.raise_for_status()
  109. # image = Image.open(BytesIO(response.content)).convert('RGB')
  110. # return image
  111. # except Exception as e:
  112. # logger.error(f"Error downloading image from {image_url}: {str(e)}")
  113. # return None
  114. # def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
  115. # """Extract dominant colors using K-means."""
  116. # try:
  117. # # Resize for faster processing
  118. # img_small = image.resize((150, 150))
  119. # img_array = np.array(img_small)
  120. # pixels = img_array.reshape(-1, 3)
  121. # # K-means clustering
  122. # kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5)
  123. # kmeans.fit(pixels)
  124. # colors = []
  125. # labels_counts = np.bincount(kmeans.labels_)
  126. # for i, center in enumerate(kmeans.cluster_centers_):
  127. # rgb = tuple(center.astype(int))
  128. # color_name = self._get_color_name_simple(rgb)
  129. # percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
  130. # colors.append({
  131. # "name": color_name,
  132. # "rgb": rgb,
  133. # "percentage": percentage
  134. # })
  135. # colors.sort(key=lambda x: x['percentage'], reverse=True)
  136. # return colors
  137. # except Exception as e:
  138. # logger.error(f"Error extracting colors: {str(e)}")
  139. # return []
  140. # def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
  141. # """
  142. # Simple color name detection without webcolors dependency.
  143. # Maps RGB to basic color names.
  144. # """
  145. # r, g, b = rgb
  146. # # Define basic color ranges
  147. # colors = {
  148. # 'black': (r < 50 and g < 50 and b < 50),
  149. # 'white': (r > 200 and g > 200 and b > 200),
  150. # 'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
  151. # 'red': (r > 150 and g < 100 and b < 100),
  152. # 'green': (g > 150 and r < 100 and b < 100),
  153. # 'blue': (b > 150 and r < 100 and g < 100),
  154. # 'yellow': (r > 200 and g > 200 and b < 100),
  155. # 'orange': (r > 200 and 100 < g < 200 and b < 100),
  156. # 'purple': (r > 100 and b > 100 and g < 100),
  157. # 'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
  158. # 'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
  159. # 'cyan': (r < 100 and g > 150 and b > 150),
  160. # }
  161. # for color_name, condition in colors.items():
  162. # if condition:
  163. # return color_name
  164. # # Default fallback
  165. # if r > g and r > b:
  166. # return 'red'
  167. # elif g > r and g > b:
  168. # return 'green'
  169. # elif b > r and b > g:
  170. # return 'blue'
  171. # else:
  172. # return 'gray'
  173. # def classify_with_clip(
  174. # self,
  175. # image: Image.Image,
  176. # candidates: List[str],
  177. # attribute_name: str,
  178. # confidence_threshold: float = 0.15
  179. # ) -> Dict:
  180. # """Use CLIP to classify image against candidate labels."""
  181. # try:
  182. # model, processor = self._get_clip_model()
  183. # device = self._get_device()
  184. # # Prepare inputs
  185. # inputs = processor(
  186. # text=candidates,
  187. # images=image,
  188. # return_tensors="pt",
  189. # padding=True
  190. # )
  191. # # Move to device
  192. # inputs = {k: v.to(device) for k, v in inputs.items()}
  193. # # Get predictions
  194. # with torch.no_grad():
  195. # outputs = model(**inputs)
  196. # logits_per_image = outputs.logits_per_image
  197. # probs = logits_per_image.softmax(dim=1).cpu()
  198. # # Get top predictions
  199. # top_k = min(3, len(candidates))
  200. # top_probs, top_indices = torch.topk(probs[0], k=top_k)
  201. # results = []
  202. # for prob, idx in zip(top_probs, top_indices):
  203. # if prob.item() > confidence_threshold:
  204. # results.append({
  205. # "value": candidates[idx.item()],
  206. # "confidence": float(prob.item())
  207. # })
  208. # return {
  209. # "attribute": attribute_name,
  210. # "predictions": results
  211. # }
  212. # except Exception as e:
  213. # logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  214. # return {"attribute": attribute_name, "predictions": []}
  215. # def detect_product_category(self, image: Image.Image) -> Tuple[str, float]:
  216. # """
  217. # First detect which category the product belongs to.
  218. # Returns: (category_name, confidence)
  219. # """
  220. # # Get all product types from all categories
  221. # all_categories = []
  222. # category_map = {}
  223. # for category, data in self.CATEGORY_ATTRIBUTES.items():
  224. # for product in data["products"]:
  225. # all_categories.append(f"a photo of a {product}")
  226. # category_map[f"a photo of a {product}"] = category
  227. # # Classify
  228. # result = self.classify_with_clip(image, all_categories, "category_detection", confidence_threshold=0.10)
  229. # if result["predictions"]:
  230. # best_match = result["predictions"][0]
  231. # detected_category = category_map[best_match["value"]]
  232. # product_type = best_match["value"].replace("a photo of a ", "")
  233. # confidence = best_match["confidence"]
  234. # logger.info(f"Detected category: {detected_category}, product: {product_type}, confidence: {confidence:.3f}")
  235. # return detected_category, product_type, confidence
  236. # return "unknown", "unknown", 0.0
  237. # def process_image(
  238. # self,
  239. # image_url: str,
  240. # product_type_hint: Optional[str] = None
  241. # ) -> Dict:
  242. # """
  243. # Main method to process image and extract visual attributes.
  244. # Now dynamically detects product category first.
  245. # """
  246. # import time
  247. # start_time = time.time()
  248. # try:
  249. # # Download image
  250. # image = self.download_image(image_url)
  251. # if image is None:
  252. # return {
  253. # "visual_attributes": {},
  254. # "error": "Failed to download image"
  255. # }
  256. # visual_attributes = {}
  257. # detailed_predictions = {}
  258. # # Step 1: Detect product category
  259. # detected_category, detected_product_type, category_confidence = self.detect_product_category(image)
  260. # # If confidence is too low, return minimal info
  261. # if category_confidence < 0.10:
  262. # logger.warning(f"Low confidence in category detection ({category_confidence:.3f}). Returning basic attributes only.")
  263. # colors = self.extract_dominant_colors(image, n_colors=3)
  264. # if colors:
  265. # visual_attributes["primary_color"] = colors[0]["name"]
  266. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  267. # return {
  268. # "visual_attributes": visual_attributes,
  269. # "category_confidence": category_confidence,
  270. # "processing_time": round(time.time() - start_time, 2)
  271. # }
  272. # # Add detected product type
  273. # visual_attributes["product_type"] = detected_product_type
  274. # visual_attributes["category"] = detected_category
  275. # # Step 2: Extract color (universal attribute)
  276. # colors = self.extract_dominant_colors(image, n_colors=3)
  277. # if colors:
  278. # visual_attributes["primary_color"] = colors[0]["name"]
  279. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  280. # # Step 3: Extract category-specific attributes
  281. # if detected_category in self.CATEGORY_ATTRIBUTES:
  282. # category_config = self.CATEGORY_ATTRIBUTES[detected_category]
  283. # for attr_name, attr_values in category_config["attributes"].items():
  284. # # Use higher confidence threshold for category-specific attributes
  285. # result = self.classify_with_clip(image, attr_values, attr_name, confidence_threshold=0.20)
  286. # if result["predictions"]:
  287. # # Only add if confidence is reasonable
  288. # best_prediction = result["predictions"][0]
  289. # if best_prediction["confidence"] > 0.20:
  290. # visual_attributes[attr_name] = best_prediction["value"]
  291. # detailed_predictions[attr_name] = result
  292. # processing_time = time.time() - start_time
  293. # return {
  294. # "visual_attributes": visual_attributes,
  295. # "detailed_predictions": detailed_predictions,
  296. # "category_confidence": category_confidence,
  297. # "processing_time": round(processing_time, 2)
  298. # }
  299. # except Exception as e:
  300. # logger.error(f"Error processing image: {str(e)}")
  301. # return {
  302. # "visual_attributes": {},
  303. # "error": str(e),
  304. # "processing_time": round(time.time() - start_time, 2)
  305. # }
  306. # ==================== visual_processing_service.py (FIXED - Smart Subcategory Detection) ====================
  307. import torch
  308. import numpy as np
  309. import requests
  310. from io import BytesIO
  311. from PIL import Image
  312. from typing import Dict, List, Optional, Tuple
  313. import logging
  314. from transformers import CLIPProcessor, CLIPModel
  315. from sklearn.cluster import KMeans
  316. logger = logging.getLogger(__name__)
  317. import os
  318. os.environ['TOKENIZERS_PARALLELISM'] = 'false' # Disable tokenizer warnings
  319. import warnings
  320. warnings.filterwarnings('ignore') # Suppress all warnings
  321. class VisualProcessingService:
  322. """Service for extracting visual attributes from product images using CLIP with smart subcategory detection."""
  323. # Class-level caching (shared across instances)
  324. _clip_model = None
  325. _clip_processor = None
  326. _device = None
  327. # Define hierarchical category structure with subcategories
  328. CATEGORY_ATTRIBUTES = {
  329. "clothing": {
  330. "subcategories": {
  331. "tops": {
  332. "products": ["t-shirt", "shirt", "blouse", "top", "sweater", "hoodie", "tank top", "polo shirt"],
  333. "attributes": {
  334. "pattern": ["solid color", "striped", "checkered", "graphic print", "floral", "geometric", "plain", "logo print"],
  335. "material": ["cotton", "polyester", "silk", "wool", "linen", "blend", "knit"],
  336. "style": ["casual", "formal", "sporty", "streetwear", "elegant", "vintage", "minimalist"],
  337. "fit": ["slim fit", "regular fit", "loose fit", "oversized", "fitted"],
  338. "neckline": ["crew neck", "v-neck", "round neck", "collar", "scoop neck", "henley"],
  339. "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", "3/4 sleeve", "cap sleeve"],
  340. "closure_type": ["button-up", "zipper", "pull-on", "snap button"]
  341. }
  342. },
  343. "bottoms": {
  344. "products": ["jeans", "pants", "trousers", "shorts", "chinos", "cargo pants", "leggings"],
  345. "attributes": {
  346. "pattern": ["solid color", "distressed", "faded", "plain", "washed", "dark wash", "light wash"],
  347. "material": ["denim", "cotton", "polyester", "wool", "blend", "twill", "corduroy"],
  348. "style": ["casual", "formal", "sporty", "vintage", "modern", "workwear"],
  349. "fit": ["slim fit", "regular fit", "loose fit", "skinny", "bootcut", "straight leg", "relaxed fit"],
  350. "rise": ["high rise", "mid rise", "low rise"],
  351. "closure_type": ["button fly", "zipper fly", "elastic waist", "drawstring"],
  352. "length": ["full length", "cropped", "ankle length", "capri"]
  353. }
  354. },
  355. "dresses_skirts": {
  356. "products": ["dress", "skirt", "gown", "sundress", "maxi dress", "mini skirt"],
  357. "attributes": {
  358. "pattern": ["solid color", "floral", "striped", "geometric", "plain", "printed", "polka dot"],
  359. "material": ["cotton", "silk", "polyester", "linen", "blend", "chiffon", "satin"],
  360. "style": ["casual", "formal", "cocktail", "bohemian", "vintage", "elegant", "party"],
  361. "fit": ["fitted", "loose", "a-line", "bodycon", "flowy", "wrap"],
  362. "neckline": ["crew neck", "v-neck", "scoop neck", "halter", "off-shoulder", "sweetheart"],
  363. "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", "3/4 sleeve", "flutter sleeve"],
  364. "length": ["mini", "midi", "maxi", "knee-length", "floor-length"]
  365. }
  366. },
  367. "outerwear": {
  368. "products": ["jacket", "coat", "blazer", "windbreaker", "parka", "bomber jacket", "denim jacket"],
  369. "attributes": {
  370. "pattern": ["solid color", "plain", "quilted", "textured"],
  371. "material": ["leather", "denim", "wool", "polyester", "cotton", "nylon", "fleece"],
  372. "style": ["casual", "formal", "sporty", "vintage", "military", "biker"],
  373. "fit": ["slim fit", "regular fit", "oversized", "cropped"],
  374. "closure_type": ["zipper", "button", "snap button", "toggle"],
  375. "length": ["cropped", "hip length", "thigh length", "knee length"]
  376. }
  377. }
  378. }
  379. },
  380. "footwear": {
  381. "products": ["sneakers", "boots", "sandals", "heels", "loafers", "flats", "slippers"],
  382. "attributes": {
  383. "material": ["leather", "canvas", "suede", "synthetic", "rubber", "mesh"],
  384. "style": ["casual", "formal", "athletic", "vintage", "modern"],
  385. "closure_type": ["lace-up", "slip-on", "velcro", "buckle", "zipper"],
  386. "toe_style": ["round toe", "pointed toe", "square toe", "open toe", "closed toe"]
  387. }
  388. },
  389. "tools": {
  390. "products": ["screwdriver", "hammer", "wrench", "pliers", "drill", "saw", "measuring tape"],
  391. "attributes": {
  392. "material": ["steel", "aluminum", "plastic", "rubber", "chrome", "iron"],
  393. "type": ["manual", "electric", "pneumatic", "cordless", "corded"],
  394. "finish": ["chrome plated", "powder coated", "stainless steel", "painted"],
  395. "handle_type": ["rubber grip", "plastic", "wooden", "ergonomic", "cushioned"]
  396. }
  397. },
  398. "electronics": {
  399. "products": ["phone", "laptop", "tablet", "headphones", "speaker", "camera", "smartwatch", "earbuds"],
  400. "attributes": {
  401. "material": ["plastic", "metal", "glass", "aluminum", "rubber", "silicone"],
  402. "style": ["modern", "minimalist", "sleek", "industrial", "vintage"],
  403. "finish": ["matte", "glossy", "metallic", "textured", "transparent"],
  404. "connectivity": ["wireless", "wired", "bluetooth", "USB-C", "USB"]
  405. }
  406. },
  407. "furniture": {
  408. "products": ["chair", "table", "sofa", "bed", "desk", "shelf", "cabinet", "bench"],
  409. "attributes": {
  410. "material": ["wood", "metal", "glass", "plastic", "fabric", "leather", "rattan"],
  411. "style": ["modern", "traditional", "industrial", "rustic", "contemporary", "vintage", "scandinavian"],
  412. "finish": ["natural wood", "painted", "stained", "laminated", "upholstered", "polished"]
  413. }
  414. }
  415. }
  416. def __init__(self):
  417. pass
  418. @classmethod
  419. def _get_device(cls):
  420. """Get optimal device."""
  421. if cls._device is None:
  422. cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  423. logger.info(f"Visual Processing using device: {cls._device}")
  424. return cls._device
  425. @classmethod
  426. def _get_clip_model(cls):
  427. """Lazy load CLIP model with class-level caching."""
  428. if cls._clip_model is None:
  429. logger.info("Loading CLIP model (this may take a few minutes on first use)...")
  430. cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  431. cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  432. device = cls._get_device()
  433. cls._clip_model.to(device)
  434. cls._clip_model.eval()
  435. logger.info("✓ CLIP model loaded successfully")
  436. return cls._clip_model, cls._clip_processor
  437. def download_image(self, image_url: str) -> Optional[Image.Image]:
  438. """Download image from URL."""
  439. try:
  440. response = requests.get(image_url, timeout=10)
  441. response.raise_for_status()
  442. image = Image.open(BytesIO(response.content)).convert('RGB')
  443. return image
  444. except Exception as e:
  445. logger.error(f"Error downloading image from {image_url}: {str(e)}")
  446. return None
  447. def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
  448. """Extract dominant colors using K-means clustering."""
  449. try:
  450. # Resize for faster processing
  451. img_small = image.resize((150, 150))
  452. img_array = np.array(img_small)
  453. pixels = img_array.reshape(-1, 3)
  454. # K-means clustering
  455. kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5)
  456. kmeans.fit(pixels)
  457. colors = []
  458. labels_counts = np.bincount(kmeans.labels_)
  459. for i, center in enumerate(kmeans.cluster_centers_):
  460. rgb = tuple(center.astype(int))
  461. color_name = self._get_color_name_simple(rgb)
  462. percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
  463. colors.append({
  464. "name": color_name,
  465. "rgb": rgb,
  466. "percentage": round(percentage, 2)
  467. })
  468. # Sort by percentage (most dominant first)
  469. colors.sort(key=lambda x: x['percentage'], reverse=True)
  470. return colors
  471. except Exception as e:
  472. logger.error(f"Error extracting colors: {str(e)}")
  473. return []
  474. def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
  475. """Map RGB values to basic color names."""
  476. r, g, b = rgb
  477. # Define color ranges with priorities
  478. colors = {
  479. 'black': (r < 50 and g < 50 and b < 50),
  480. 'white': (r > 200 and g > 200 and b > 200),
  481. 'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
  482. 'red': (r > 150 and g < 100 and b < 100),
  483. 'green': (g > 150 and r < 100 and b < 100),
  484. 'blue': (b > 150 and r < 100 and g < 100),
  485. 'yellow': (r > 200 and g > 200 and b < 100),
  486. 'orange': (r > 200 and 100 < g < 200 and b < 100),
  487. 'purple': (r > 100 and b > 100 and g < 100),
  488. 'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
  489. 'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
  490. 'cyan': (r < 100 and g > 150 and b > 150),
  491. 'beige': (180 < r < 240 and 160 < g < 220 and 120 < b < 180),
  492. }
  493. for color_name, condition in colors.items():
  494. if condition:
  495. return color_name
  496. # Fallback to dominant channel
  497. if r > g and r > b:
  498. return 'red'
  499. elif g > r and g > b:
  500. return 'green'
  501. elif b > r and b > g:
  502. return 'blue'
  503. else:
  504. return 'gray'
  505. # def classify_with_clip(
  506. # self,
  507. # image: Image.Image,
  508. # candidates: List[str],
  509. # attribute_name: str,
  510. # confidence_threshold: float = 0.15
  511. # ) -> Dict:
  512. # """Use CLIP to classify image against candidate labels."""
  513. # try:
  514. # model, processor = self._get_clip_model()
  515. # device = self._get_device()
  516. # # Prepare inputs
  517. # inputs = processor(
  518. # text=candidates,
  519. # images=image,
  520. # return_tensors="pt",
  521. # padding=True
  522. # )
  523. # # Move to device
  524. # inputs = {k: v.to(device) for k, v in inputs.items()}
  525. # # Get predictions
  526. # with torch.no_grad():
  527. # outputs = model(**inputs)
  528. # logits_per_image = outputs.logits_per_image
  529. # probs = logits_per_image.softmax(dim=1).cpu()
  530. # # Get top predictions
  531. # top_k = min(3, len(candidates))
  532. # top_probs, top_indices = torch.topk(probs[0], k=top_k)
  533. # results = []
  534. # for prob, idx in zip(top_probs, top_indices):
  535. # if prob.item() > confidence_threshold:
  536. # results.append({
  537. # "value": candidates[idx.item()],
  538. # "confidence": round(float(prob.item()), 3)
  539. # })
  540. # return {
  541. # "attribute": attribute_name,
  542. # "predictions": results
  543. # }
  544. # except Exception as e:
  545. # logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  546. # return {"attribute": attribute_name, "predictions": []}
  547. def classify_with_clip(
  548. self,
  549. image: Image.Image,
  550. candidates: List[str],
  551. attribute_name: str,
  552. confidence_threshold: float = 0.15
  553. ) -> Dict:
  554. """Use CLIP to classify image against candidate labels."""
  555. try:
  556. model, processor = self._get_clip_model()
  557. device = self._get_device()
  558. # ⚡ OPTIMIZATION: Process in smaller batches to avoid memory issues
  559. batch_size = 16 # Process 16 candidates at a time
  560. all_results = []
  561. for i in range(0, len(candidates), batch_size):
  562. batch_candidates = candidates[i:i + batch_size]
  563. # Prepare inputs WITHOUT progress bars
  564. inputs = processor(
  565. text=batch_candidates,
  566. images=image,
  567. return_tensors="pt",
  568. padding=True
  569. )
  570. # Move to device
  571. inputs = {k: v.to(device) for k, v in inputs.items()}
  572. # Get predictions
  573. with torch.no_grad():
  574. outputs = model(**inputs)
  575. logits_per_image = outputs.logits_per_image
  576. probs = logits_per_image.softmax(dim=1).cpu()
  577. # Collect results from this batch
  578. for j, prob in enumerate(probs[0]):
  579. if prob.item() > confidence_threshold:
  580. all_results.append({
  581. "value": batch_candidates[j],
  582. "confidence": round(float(prob.item()), 3)
  583. })
  584. # Sort by confidence and return top 3
  585. all_results.sort(key=lambda x: x['confidence'], reverse=True)
  586. return {
  587. "attribute": attribute_name,
  588. "predictions": all_results[:3]
  589. }
  590. except Exception as e:
  591. logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  592. return {"attribute": attribute_name, "predictions": []}
  593. def detect_category_and_subcategory(self, image: Image.Image) -> Tuple[str, str, str, float]:
  594. """
  595. Hierarchically detect category, subcategory, and specific product.
  596. Returns: (category, subcategory, product_type, confidence)
  597. """
  598. # Step 1: Detect if it's clothing or something else
  599. main_categories = list(self.CATEGORY_ATTRIBUTES.keys())
  600. category_prompts = [f"a photo of {cat}" for cat in main_categories]
  601. result = self.classify_with_clip(image, category_prompts, "main_category", confidence_threshold=0.10)
  602. if not result["predictions"]:
  603. return "unknown", "unknown", "unknown", 0.0
  604. detected_category = result["predictions"][0]["value"].replace("a photo of ", "")
  605. category_confidence = result["predictions"][0]["confidence"]
  606. logger.info(f"Step 1 - Main category detected: {detected_category} (confidence: {category_confidence:.3f})")
  607. # Step 2: For clothing, detect subcategory (tops/bottoms/dresses/outerwear)
  608. if detected_category == "clothing":
  609. subcategories = self.CATEGORY_ATTRIBUTES["clothing"]["subcategories"]
  610. # Collect all products grouped by subcategory
  611. all_products = []
  612. product_to_subcategory = {}
  613. for subcat, subcat_data in subcategories.items():
  614. for product in subcat_data["products"]:
  615. prompt = f"a photo of {product}"
  616. all_products.append(prompt)
  617. product_to_subcategory[prompt] = subcat
  618. # Step 3: Detect specific product type
  619. product_result = self.classify_with_clip(
  620. image,
  621. all_products,
  622. "product_type",
  623. confidence_threshold=0.12
  624. )
  625. if product_result["predictions"]:
  626. best_match = product_result["predictions"][0]
  627. product_prompt = best_match["value"]
  628. product_type = product_prompt.replace("a photo of ", "")
  629. subcategory = product_to_subcategory[product_prompt]
  630. product_confidence = best_match["confidence"]
  631. logger.info(f"Step 2 - Detected: {subcategory} > {product_type} (confidence: {product_confidence:.3f})")
  632. return detected_category, subcategory, product_type, product_confidence
  633. else:
  634. logger.warning("Could not detect specific product type for clothing")
  635. return detected_category, "unknown", "unknown", category_confidence
  636. # Step 3: For non-clothing categories, just detect product type
  637. else:
  638. category_data = self.CATEGORY_ATTRIBUTES[detected_category]
  639. # Check if this category has subcategories or direct products
  640. if "products" in category_data:
  641. products = category_data["products"]
  642. product_prompts = [f"a photo of {p}" for p in products]
  643. product_result = self.classify_with_clip(
  644. image,
  645. product_prompts,
  646. "product_type",
  647. confidence_threshold=0.12
  648. )
  649. if product_result["predictions"]:
  650. best_match = product_result["predictions"][0]
  651. product_type = best_match["value"].replace("a photo of ", "")
  652. logger.info(f"Step 2 - Detected: {detected_category} > {product_type}")
  653. return detected_category, "none", product_type, best_match["confidence"]
  654. return detected_category, "unknown", "unknown", category_confidence
  655. def process_image(
  656. self,
  657. image_url: str,
  658. product_type_hint: Optional[str] = None
  659. ) -> Dict:
  660. """
  661. Main method to process image and extract visual attributes.
  662. Uses hierarchical detection to extract only relevant attributes.
  663. """
  664. import time
  665. start_time = time.time()
  666. try:
  667. # Download image
  668. image = self.download_image(image_url)
  669. if image is None:
  670. return {
  671. "visual_attributes": {},
  672. "error": "Failed to download image"
  673. }
  674. visual_attributes = {}
  675. detailed_predictions = {}
  676. # Step 1: Detect category, subcategory, and product type
  677. category, subcategory, product_type, confidence = self.detect_category_and_subcategory(image)
  678. # Low confidence check
  679. if confidence < 0.10:
  680. logger.warning(f"Low confidence in detection ({confidence:.3f}). Returning basic attributes only.")
  681. colors = self.extract_dominant_colors(image, n_colors=3)
  682. if colors:
  683. visual_attributes["primary_color"] = colors[0]["name"]
  684. visual_attributes["color_palette"] = [c["name"] for c in colors]
  685. return {
  686. "visual_attributes": visual_attributes,
  687. "detection_confidence": confidence,
  688. "warning": "Low confidence detection",
  689. "processing_time": round(time.time() - start_time, 2)
  690. }
  691. # Add detected metadata
  692. visual_attributes["product_type"] = product_type
  693. visual_attributes["category"] = category
  694. if subcategory != "none" and subcategory != "unknown":
  695. visual_attributes["subcategory"] = subcategory
  696. # Step 2: Extract color information (universal)
  697. colors = self.extract_dominant_colors(image, n_colors=3)
  698. if colors:
  699. visual_attributes["primary_color"] = colors[0]["name"]
  700. visual_attributes["color_palette"] = [c["name"] for c in colors[:3]]
  701. visual_attributes["color_distribution"] = [
  702. {"color": c["name"], "percentage": c["percentage"]}
  703. for c in colors
  704. ]
  705. # Step 3: Get the right attribute configuration based on subcategory
  706. attributes_config = None
  707. if category == "clothing":
  708. if subcategory in self.CATEGORY_ATTRIBUTES["clothing"]["subcategories"]:
  709. attributes_config = self.CATEGORY_ATTRIBUTES["clothing"]["subcategories"][subcategory]["attributes"]
  710. logger.info(f"Using attributes for subcategory: {subcategory}")
  711. else:
  712. logger.warning(f"Unknown subcategory: {subcategory}. Skipping attribute extraction.")
  713. elif category in self.CATEGORY_ATTRIBUTES:
  714. if "attributes" in self.CATEGORY_ATTRIBUTES[category]:
  715. attributes_config = self.CATEGORY_ATTRIBUTES[category]["attributes"]
  716. logger.info(f"Using attributes for category: {category}")
  717. # Step 4: Extract category-specific attributes
  718. if attributes_config:
  719. for attr_name, attr_values in attributes_config.items():
  720. result = self.classify_with_clip(
  721. image,
  722. attr_values,
  723. attr_name,
  724. confidence_threshold=0.20
  725. )
  726. if result["predictions"]:
  727. best_prediction = result["predictions"][0]
  728. # Only add attributes with reasonable confidence
  729. if best_prediction["confidence"] > 0.20:
  730. visual_attributes[attr_name] = best_prediction["value"]
  731. # Store detailed predictions for debugging
  732. detailed_predictions[attr_name] = result
  733. processing_time = time.time() - start_time
  734. logger.info(f"✓ Processing complete in {processing_time:.2f}s. Extracted {len(visual_attributes)} attributes.")
  735. return {
  736. "visual_attributes": visual_attributes,
  737. "detailed_predictions": detailed_predictions,
  738. "detection_confidence": confidence,
  739. "processing_time": round(processing_time, 2)
  740. }
  741. except Exception as e:
  742. logger.error(f"Error processing image: {str(e)}")
  743. return {
  744. "visual_attributes": {},
  745. "error": str(e),
  746. "processing_time": round(time.time() - start_time, 2)
  747. }
  748. # # ==================== visual_processing_service_enhanced.py ====================
  749. # """
  750. # Enhanced Visual Processing Service combining CLIP's speed with BLIP-2's comprehensive taxonomy.
  751. # Features:
  752. # - Fast CLIP-based classification
  753. # - 70+ product categories across multiple domains
  754. # - Two-stage classification with validation
  755. # - Enhanced color normalization
  756. # - Category-specific attribute detection
  757. # - Confidence-based fallback mechanisms
  758. # - Optional center cropping for better focus
  759. # Usage:
  760. # service = VisualProcessingService()
  761. # result = service.process_image("https://example.com/product.jpg")
  762. # """
  763. # import torch
  764. # import cv2
  765. # import numpy as np
  766. # import requests
  767. # from io import BytesIO
  768. # from PIL import Image
  769. # from typing import Dict, List, Optional, Tuple
  770. # import logging
  771. # from transformers import CLIPProcessor, CLIPModel
  772. # from sklearn.cluster import KMeans
  773. # logger = logging.getLogger(__name__)
  774. # class VisualProcessingService:
  775. # """Enhanced service for extracting visual attributes from product images using CLIP."""
  776. # # Class-level caching (shared across instances)
  777. # _clip_model = None
  778. # _clip_processor = None
  779. # _device = None
  780. # # ==================== EXPANDED TAXONOMY ====================
  781. # # Base color vocabulary
  782. # COLORS = ["black", "white", "red", "blue", "green", "yellow", "gray",
  783. # "brown", "pink", "purple", "orange", "beige", "navy", "teal"]
  784. # # Pattern vocabulary
  785. # PATTERNS = ["solid", "striped", "checked", "plaid", "floral", "graphic",
  786. # "polka dot", "camo", "tie-dye", "abstract", "geometric"]
  787. # # Material vocabulary (extended)
  788. # MATERIALS = ["cotton", "polyester", "denim", "leather", "wool", "canvas",
  789. # "silicone", "metal", "fabric", "rubber", "plastic", "wood",
  790. # "glass", "ceramic", "steel", "foam", "aluminum", "carbon fiber"]
  791. # # Style vocabulary
  792. # STYLES = ["casual", "formal", "sporty", "streetwear", "elegant", "vintage",
  793. # "modern", "bohemian", "minimalist", "industrial", "rustic", "contemporary"]
  794. # # Fit vocabulary
  795. # FITS = ["slim fit", "regular fit", "loose fit", "oversized", "tailored",
  796. # "relaxed", "athletic fit"]
  797. # # Brand vocabulary (common marketplace brands)
  798. # BRANDS = ["nike", "adidas", "sony", "samsung", "apple", "generic", "lego",
  799. # "hasbro", "lg", "panasonic", "microsoft"]
  800. # # Age group vocabulary
  801. # AGE_GROUPS = ["baby", "toddler", "child", "teen", "adult", "all ages"]
  802. # # Comprehensive category-specific attributes
  803. # CATEGORY_ATTRIBUTES = {
  804. # # ==================== CLOTHING ====================
  805. # "clothing": {
  806. # "products": ["t-shirt", "shirt", "dress", "pants", "jeans", "shorts",
  807. # "skirt", "jacket", "coat", "sweater", "hoodie", "top",
  808. # "blouse", "cardigan", "blazer"],
  809. # "attributes": {
  810. # "color": COLORS,
  811. # "pattern": PATTERNS,
  812. # "material": ["cotton", "polyester", "denim", "leather", "silk",
  813. # "wool", "linen", "blend", "canvas"],
  814. # "style": STYLES,
  815. # "fit": FITS,
  816. # "neckline": ["crew neck", "v-neck", "round neck", "collar",
  817. # "scoop neck", "boat neck", "turtleneck"],
  818. # "sleeve_type": ["short sleeve", "long sleeve", "sleeveless",
  819. # "3/4 sleeve", "cap sleeve"],
  820. # "closure_type": ["button", "zipper", "pull-on", "snap", "tie", "buckle"]
  821. # }
  822. # },
  823. # # ==================== FOOTWEAR ====================
  824. # "footwear": {
  825. # "products": ["shoes", "sneakers", "sandals", "boots", "slippers",
  826. # "heels", "loafers"],
  827. # "attributes": {
  828. # "color": COLORS,
  829. # "material": ["leather", "synthetic", "canvas", "rubber", "suede", "fabric"],
  830. # "type": ["sneakers", "sandals", "formal", "boots", "sports", "casual"],
  831. # "style": STYLES,
  832. # "closure_type": ["lace-up", "slip-on", "velcro", "zipper", "buckle"]
  833. # }
  834. # },
  835. # # ==================== ACCESSORIES ====================
  836. # "accessories": {
  837. # "products": ["watch", "bag", "backpack", "handbag", "wallet", "belt",
  838. # "sunglasses", "hat", "scarf"],
  839. # "attributes": {
  840. # "color": COLORS,
  841. # "material": ["leather", "fabric", "metal", "plastic", "canvas", "synthetic"],
  842. # "style": STYLES,
  843. # "type": ["backpack", "tote", "crossbody", "messenger", "duffel"]
  844. # }
  845. # },
  846. # # ==================== JEWELRY ====================
  847. # "jewelry": {
  848. # "products": ["necklace", "ring", "bracelet", "earrings", "pendant", "chain"],
  849. # "attributes": {
  850. # "material": ["gold", "silver", "platinum", "stainless steel",
  851. # "plastic", "beads", "leather"],
  852. # "style": ["modern", "vintage", "minimalist", "statement", "elegant"],
  853. # "type": ["chain", "band", "solitaire", "hoop", "stud"]
  854. # }
  855. # },
  856. # # ==================== ELECTRONICS ====================
  857. # "electronics": {
  858. # "products": ["phone", "smartphone", "tablet", "laptop", "headphones",
  859. # "camera", "tv", "monitor", "keyboard", "mouse", "speaker",
  860. # "smartwatch", "charger"],
  861. # "attributes": {
  862. # "color": COLORS,
  863. # "material": ["plastic", "metal", "glass", "aluminum", "rubber"],
  864. # "style": ["modern", "minimalist", "sleek", "industrial"],
  865. # "finish": ["matte", "glossy", "metallic", "textured"],
  866. # "type": ["over-ear", "in-ear", "on-ear", "wireless", "wired"],
  867. # "brand": BRANDS
  868. # }
  869. # },
  870. # # ==================== FURNITURE ====================
  871. # "furniture": {
  872. # "products": ["chair", "table", "sofa", "bed", "desk", "shelf",
  873. # "cabinet", "dresser", "bench", "stool", "bookshelf"],
  874. # "attributes": {
  875. # "color": COLORS,
  876. # "material": ["wood", "metal", "glass", "plastic", "fabric", "leather"],
  877. # "style": ["modern", "traditional", "industrial", "rustic",
  878. # "contemporary", "vintage", "minimalist"],
  879. # "finish": ["natural wood", "painted", "stained", "laminated", "upholstered"]
  880. # }
  881. # },
  882. # # ==================== HOME DECOR ====================
  883. # "home_decor": {
  884. # "products": ["painting", "canvas", "wall art", "frame", "vase", "lamp",
  885. # "mirror", "clock", "sculpture", "poster", "cushion", "rug"],
  886. # "attributes": {
  887. # "color": COLORS,
  888. # "style": ["modern", "abstract", "traditional", "contemporary",
  889. # "vintage", "minimalist", "bohemian"],
  890. # "material": ["canvas", "wood", "metal", "glass", "ceramic", "paper", "fabric"],
  891. # "finish": ["glossy", "matte", "textured", "framed"],
  892. # "theme": ["nature", "geometric", "floral", "landscape", "abstract"]
  893. # }
  894. # },
  895. # # ==================== KITCHEN ====================
  896. # "kitchen": {
  897. # "products": ["pot", "pan", "knife", "utensil", "plate", "bowl", "cup",
  898. # "mug", "bottle", "container", "cutting board"],
  899. # "attributes": {
  900. # "material": ["stainless steel", "aluminum", "ceramic", "glass",
  901. # "plastic", "wood", "silicone"],
  902. # "finish": ["non-stick", "stainless", "enameled", "anodized"],
  903. # "type": ["frypan", "saucepan", "chef knife", "utility", "mixing"]
  904. # }
  905. # },
  906. # # ==================== APPLIANCES ====================
  907. # "appliances": {
  908. # "products": ["microwave", "blender", "vacuum", "fan", "toaster",
  909. # "coffee maker", "iron", "hair dryer"],
  910. # "attributes": {
  911. # "color": COLORS,
  912. # "type": ["upright", "robot", "handheld", "ceiling", "table", "tower"],
  913. # "power": ["low", "medium", "high", "variable"],
  914. # "brand": BRANDS
  915. # }
  916. # },
  917. # # ==================== BEAUTY & PERSONAL CARE ====================
  918. # "beauty": {
  919. # "products": ["lipstick", "perfume", "lotion", "hair dryer", "makeup",
  920. # "skincare", "nail polish", "shampoo"],
  921. # "attributes": {
  922. # "color": COLORS,
  923. # "type": ["eau de parfum", "eau de toilette", "body spray",
  924. # "body lotion", "face cream"],
  925. # "finish": ["matte", "glossy", "satin", "shimmer"]
  926. # }
  927. # },
  928. # # ==================== TOYS ====================
  929. # "toys": {
  930. # "products": ["doll", "puzzle", "board game", "action figure", "plush toy",
  931. # "toy car", "lego", "building blocks"],
  932. # "attributes": {
  933. # "color": COLORS,
  934. # "age_group": AGE_GROUPS,
  935. # "material": ["plastic", "wood", "fabric", "metal", "foam"],
  936. # "type": ["educational", "plush", "action", "vehicle", "puzzle", "board game"],
  937. # "brand": BRANDS
  938. # }
  939. # },
  940. # # ==================== SPORTS & OUTDOOR ====================
  941. # "sports": {
  942. # "products": ["bicycle", "football", "basketball", "tennis racket",
  943. # "yoga mat", "helmet", "skateboard", "dumbbells", "ball"],
  944. # "attributes": {
  945. # "color": COLORS,
  946. # "material": ["steel", "aluminum", "carbon fiber", "rubber",
  947. # "leather", "synthetic", "foam", "composite"],
  948. # "sport_type": ["football", "basketball", "tennis", "cycling",
  949. # "yoga", "gym", "outdoor", "fitness"],
  950. # "type": ["mountain", "road", "hybrid", "bmx", "indoor", "outdoor"],
  951. # "brand": BRANDS
  952. # }
  953. # },
  954. # # ==================== PET SUPPLIES ====================
  955. # "pet_supplies": {
  956. # "products": ["pet bed", "pet toy", "leash", "pet bowl", "collar",
  957. # "pet carrier"],
  958. # "attributes": {
  959. # "color": COLORS,
  960. # "material": ["fabric", "plastic", "metal", "nylon", "leather"],
  961. # "size": ["small", "medium", "large", "extra large"]
  962. # }
  963. # },
  964. # # ==================== BABY PRODUCTS ====================
  965. # "baby": {
  966. # "products": ["stroller", "baby bottle", "diaper", "crib", "high chair",
  967. # "baby carrier"],
  968. # "attributes": {
  969. # "color": COLORS,
  970. # "material": MATERIALS,
  971. # "type": ["full-size", "umbrella", "jogging", "disposable", "cloth"],
  972. # "age_group": ["newborn", "baby", "toddler"]
  973. # }
  974. # },
  975. # # ==================== TOOLS & HARDWARE ====================
  976. # "tools": {
  977. # "products": ["hammer", "drill", "screwdriver", "wrench", "saw",
  978. # "pliers", "measuring tape", "level"],
  979. # "attributes": {
  980. # "material": ["steel", "aluminum", "plastic", "wood", "rubber",
  981. # "chrome", "fiberglass"],
  982. # "type": ["manual", "electric", "cordless", "corded", "pneumatic"],
  983. # "finish": ["chrome plated", "powder coated", "stainless steel"],
  984. # "brand": BRANDS
  985. # }
  986. # },
  987. # # ==================== BOOKS & MEDIA ====================
  988. # "books_media": {
  989. # "products": ["book", "magazine", "dvd", "video game", "cd", "vinyl"],
  990. # "attributes": {
  991. # "type": ["paperback", "hardcover", "ebook", "audiobook"],
  992. # "genre": ["fiction", "non-fiction", "educational", "kids",
  993. # "action", "adventure", "sports", "rpg"]
  994. # }
  995. # },
  996. # # ==================== AUTOMOTIVE ====================
  997. # "automotive": {
  998. # "products": ["car accessory", "tire", "car seat", "steering wheel cover",
  999. # "floor mat"],
  1000. # "attributes": {
  1001. # "color": COLORS,
  1002. # "material": ["rubber", "plastic", "fabric", "leather", "vinyl"],
  1003. # "type": ["universal", "custom fit"]
  1004. # }
  1005. # },
  1006. # # ==================== OFFICE SUPPLIES ====================
  1007. # "office": {
  1008. # "products": ["pen", "notebook", "folder", "desk organizer", "stapler",
  1009. # "calculator", "paper"],
  1010. # "attributes": {
  1011. # "color": COLORS,
  1012. # "material": ["paper", "plastic", "metal", "cardboard"],
  1013. # "type": ["ruled", "blank", "grid", "dot grid"]
  1014. # }
  1015. # },
  1016. # # ==================== GARDEN & OUTDOOR ====================
  1017. # "garden": {
  1018. # "products": ["plant pot", "garden tool", "watering can", "planter",
  1019. # "garden hose", "lawn mower"],
  1020. # "attributes": {
  1021. # "color": COLORS,
  1022. # "material": ["ceramic", "plastic", "metal", "terracotta", "wood"],
  1023. # "type": ["indoor", "outdoor", "hanging", "standing"]
  1024. # }
  1025. # }
  1026. # }
  1027. # # Attribute-specific confidence thresholds
  1028. # CONFIDENCE_THRESHOLDS = {
  1029. # "color": 0.20,
  1030. # "pattern": 0.25,
  1031. # "material": 0.30,
  1032. # "style": 0.20,
  1033. # "fit": 0.25,
  1034. # "brand": 0.40,
  1035. # "type": 0.22,
  1036. # "finish": 0.28,
  1037. # "neckline": 0.23,
  1038. # "sleeve_type": 0.23
  1039. # }
  1040. # def __init__(self):
  1041. # pass
  1042. # @classmethod
  1043. # def _get_device(cls):
  1044. # """Get optimal device."""
  1045. # if cls._device is None:
  1046. # cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  1047. # logger.info(f"Visual Processing using device: {cls._device}")
  1048. # return cls._device
  1049. # @classmethod
  1050. # def _get_clip_model(cls):
  1051. # """Lazy load CLIP model with class-level caching."""
  1052. # if cls._clip_model is None:
  1053. # logger.info("Loading CLIP model (this may take a few minutes on first use)...")
  1054. # cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  1055. # cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  1056. # device = cls._get_device()
  1057. # cls._clip_model.to(device)
  1058. # cls._clip_model.eval()
  1059. # logger.info("✓ CLIP model loaded successfully")
  1060. # return cls._clip_model, cls._clip_processor
  1061. # def center_crop(self, image: Image.Image, rel_crop: float = 0.7) -> Image.Image:
  1062. # """
  1063. # Center-crop to focus on the product area if there is too much background.
  1064. # Args:
  1065. # image: PIL Image
  1066. # rel_crop: Relative crop size (0.7 = 70% of min dimension)
  1067. # """
  1068. # w, h = image.size
  1069. # side = int(min(w, h) * rel_crop)
  1070. # left = (w - side) // 2
  1071. # top = (h - side) // 2
  1072. # return image.crop((left, top, left + side, top + side))
  1073. # def download_image(self, image_url: str, apply_crop: bool = False,
  1074. # max_size: Tuple[int, int] = (1024, 1024)) -> Optional[Image.Image]:
  1075. # """
  1076. # Download image from URL with optional preprocessing.
  1077. # Args:
  1078. # image_url: URL of the image
  1079. # apply_crop: Whether to apply center crop
  1080. # max_size: Maximum dimensions for resizing
  1081. # """
  1082. # try:
  1083. # response = requests.get(image_url, timeout=10)
  1084. # response.raise_for_status()
  1085. # image = Image.open(BytesIO(response.content)).convert('RGB')
  1086. # # Resize if too large
  1087. # if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
  1088. # image.thumbnail(max_size, Image.Resampling.LANCZOS)
  1089. # # Optional center crop
  1090. # if apply_crop:
  1091. # image = self.center_crop(image, rel_crop=0.7)
  1092. # return image
  1093. # except Exception as e:
  1094. # logger.error(f"Error downloading image from {image_url}: {str(e)}")
  1095. # return None
  1096. # def normalize_color(self, word: str) -> str:
  1097. # """
  1098. # Enhanced color normalization with aliases and modifiers.
  1099. # Args:
  1100. # word: Color word to normalize
  1101. # """
  1102. # w = word.lower().strip()
  1103. # # Remove light/dark modifiers
  1104. # w = w.replace("light ", "").replace("dark ", "")
  1105. # w = w.replace("bright ", "").replace("pale ", "")
  1106. # # Alias mapping
  1107. # aliases = {
  1108. # "grey": "gray",
  1109. # "navy": "blue",
  1110. # "navy blue": "blue",
  1111. # "maroon": "red",
  1112. # "crimson": "red",
  1113. # "scarlet": "red",
  1114. # "teal": "green",
  1115. # "turquoise": "blue",
  1116. # "cyan": "blue",
  1117. # "indigo": "blue",
  1118. # "violet": "purple",
  1119. # "lavender": "purple",
  1120. # "magenta": "pink",
  1121. # "off white": "white",
  1122. # "off-white": "white",
  1123. # "cream": "beige",
  1124. # "ivory": "white",
  1125. # "khaki": "beige",
  1126. # "tan": "brown",
  1127. # "bronze": "brown",
  1128. # "gold": "yellow",
  1129. # "silver": "gray",
  1130. # "charcoal": "gray"
  1131. # }
  1132. # normalized = aliases.get(w, w)
  1133. # # Validate against canonical colors
  1134. # if normalized not in [c.lower() for c in self.COLORS]:
  1135. # # Try first word if it's a compound
  1136. # first_word = normalized.split()[0] if ' ' in normalized else normalized
  1137. # if first_word in [c.lower() for c in self.COLORS]:
  1138. # return first_word
  1139. # return normalized
  1140. # def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
  1141. # """Extract dominant colors using K-means clustering."""
  1142. # try:
  1143. # # Resize for faster processing
  1144. # img_small = image.resize((150, 150))
  1145. # img_array = np.array(img_small)
  1146. # pixels = img_array.reshape(-1, 3)
  1147. # # Sample if too many pixels
  1148. # if len(pixels) > 10000:
  1149. # indices = np.random.choice(len(pixels), 10000, replace=False)
  1150. # pixels = pixels[indices]
  1151. # # K-means clustering
  1152. # kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5, max_iter=100)
  1153. # kmeans.fit(pixels)
  1154. # colors = []
  1155. # labels_counts = np.bincount(kmeans.labels_)
  1156. # for i, center in enumerate(kmeans.cluster_centers_):
  1157. # rgb = tuple(center.astype(int))
  1158. # color_name = self._get_color_name_simple(rgb)
  1159. # percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
  1160. # colors.append({
  1161. # "name": color_name,
  1162. # "rgb": rgb,
  1163. # "percentage": percentage
  1164. # })
  1165. # colors.sort(key=lambda x: x['percentage'], reverse=True)
  1166. # return colors
  1167. # except Exception as e:
  1168. # logger.error(f"Error extracting colors: {str(e)}")
  1169. # return []
  1170. # def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
  1171. # """Simple RGB to color name mapping."""
  1172. # r, g, b = rgb
  1173. # # Define color ranges
  1174. # colors = {
  1175. # 'black': (r < 50 and g < 50 and b < 50),
  1176. # 'white': (r > 200 and g > 200 and b > 200),
  1177. # 'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
  1178. # 'red': (r > 150 and g < 100 and b < 100),
  1179. # 'green': (g > 150 and r < 100 and b < 100),
  1180. # 'blue': (b > 150 and r < 100 and g < 100),
  1181. # 'yellow': (r > 200 and g > 200 and b < 100),
  1182. # 'orange': (r > 200 and 100 < g < 200 and b < 100),
  1183. # 'purple': (r > 100 and b > 100 and g < 100),
  1184. # 'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
  1185. # 'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
  1186. # 'beige': (150 < r < 220 and 140 < g < 200 and 100 < b < 180),
  1187. # }
  1188. # for color_name, condition in colors.items():
  1189. # if condition:
  1190. # return color_name
  1191. # # Fallback based on dominant channel
  1192. # if r > g and r > b:
  1193. # return 'red'
  1194. # elif g > r and g > b:
  1195. # return 'green'
  1196. # elif b > r and b > g:
  1197. # return 'blue'
  1198. # else:
  1199. # return 'gray'
  1200. # def classify_with_clip(
  1201. # self,
  1202. # image: Image.Image,
  1203. # candidates: List[str],
  1204. # attribute_name: str,
  1205. # confidence_threshold: Optional[float] = None
  1206. # ) -> Dict:
  1207. # """
  1208. # Use CLIP to classify image against candidate labels.
  1209. # Args:
  1210. # image: PIL Image
  1211. # candidates: List of text labels to classify against
  1212. # attribute_name: Name of the attribute being classified
  1213. # confidence_threshold: Override default threshold
  1214. # """
  1215. # try:
  1216. # model, processor = self._get_clip_model()
  1217. # device = self._get_device()
  1218. # # Use attribute-specific threshold if not provided
  1219. # if confidence_threshold is None:
  1220. # confidence_threshold = self.CONFIDENCE_THRESHOLDS.get(attribute_name, 0.20)
  1221. # # Prepare inputs
  1222. # inputs = processor(
  1223. # text=candidates,
  1224. # images=image,
  1225. # return_tensors="pt",
  1226. # padding=True
  1227. # )
  1228. # # Move to device
  1229. # inputs = {k: v.to(device) for k, v in inputs.items()}
  1230. # # Get predictions
  1231. # with torch.no_grad():
  1232. # outputs = model(**inputs)
  1233. # logits_per_image = outputs.logits_per_image
  1234. # probs = logits_per_image.softmax(dim=1).cpu()
  1235. # # Get top predictions
  1236. # top_k = min(3, len(candidates))
  1237. # top_probs, top_indices = torch.topk(probs[0], k=top_k)
  1238. # results = []
  1239. # for prob, idx in zip(top_probs, top_indices):
  1240. # if prob.item() > confidence_threshold:
  1241. # value = candidates[idx.item()]
  1242. # # Apply color normalization if color attribute
  1243. # if attribute_name == "color":
  1244. # value = self.normalize_color(value)
  1245. # results.append({
  1246. # "value": value,
  1247. # "confidence": float(prob.item())
  1248. # })
  1249. # return {
  1250. # "attribute": attribute_name,
  1251. # "predictions": results
  1252. # }
  1253. # except Exception as e:
  1254. # logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  1255. # return {"attribute": attribute_name, "predictions": []}
  1256. # def detect_category_hierarchical(self, image: Image.Image) -> Tuple[str, str, float]:
  1257. # """
  1258. # Two-stage hierarchical product detection:
  1259. # 1. Detect broad category
  1260. # 2. Detect specific product within that category
  1261. # Returns:
  1262. # (category, product_type, confidence)
  1263. # """
  1264. # # Stage 1: Detect broad category
  1265. # category_names = list(self.CATEGORY_ATTRIBUTES.keys())
  1266. # category_labels = [f"a photo of {cat.replace('_', ' ')}" for cat in category_names]
  1267. # category_result = self.classify_with_clip(
  1268. # image, category_labels, "category_detection", confidence_threshold=0.15
  1269. # )
  1270. # if not category_result["predictions"]:
  1271. # return "unknown", "unknown", 0.0
  1272. # # Extract category
  1273. # best_category_match = category_result["predictions"][0]
  1274. # detected_category = category_names[category_labels.index(best_category_match["value"])]
  1275. # category_confidence = best_category_match["confidence"]
  1276. # # Stage 2: Detect specific product within category
  1277. # products_in_category = self.CATEGORY_ATTRIBUTES[detected_category]["products"]
  1278. # product_labels = [f"a photo of a {p}" for p in products_in_category]
  1279. # product_result = self.classify_with_clip(
  1280. # image, product_labels, "product_detection", confidence_threshold=0.15
  1281. # )
  1282. # if product_result["predictions"]:
  1283. # best_product = product_result["predictions"][0]
  1284. # product_type = products_in_category[product_labels.index(best_product["value"])]
  1285. # product_confidence = best_product["confidence"]
  1286. # # Combined confidence (geometric mean for balance)
  1287. # combined_confidence = (category_confidence * product_confidence) ** 0.5
  1288. # logger.info(f"Detected: {detected_category} → {product_type} (confidence: {combined_confidence:.3f})")
  1289. # return detected_category, product_type, combined_confidence
  1290. # return detected_category, "unknown", category_confidence * 0.5
  1291. # def detect_category_flat(self, image: Image.Image) -> Tuple[str, str, float]:
  1292. # """
  1293. # Single-stage flat product detection across all categories.
  1294. # Faster but potentially less accurate.
  1295. # Returns:
  1296. # (category, product_type, confidence)
  1297. # """
  1298. # # Collect all products with their categories
  1299. # all_products = []
  1300. # product_to_category = {}
  1301. # for category, data in self.CATEGORY_ATTRIBUTES.items():
  1302. # for product in data["products"]:
  1303. # label = f"a photo of a {product}"
  1304. # all_products.append(label)
  1305. # product_to_category[label] = category
  1306. # # Classify
  1307. # result = self.classify_with_clip(
  1308. # image, all_products, "product_detection", confidence_threshold=0.15
  1309. # )
  1310. # if result["predictions"]:
  1311. # best_match = result["predictions"][0]
  1312. # product_label = best_match["value"]
  1313. # category = product_to_category[product_label]
  1314. # product_type = product_label.replace("a photo of a ", "")
  1315. # confidence = best_match["confidence"]
  1316. # logger.info(f"Detected: {category} → {product_type} (confidence: {confidence:.3f})")
  1317. # return category, product_type, confidence
  1318. # return "unknown", "unknown", 0.0
  1319. # def process_image(
  1320. # self,
  1321. # image_url: str,
  1322. # product_type_hint: Optional[str] = None,
  1323. # apply_crop: bool = False,
  1324. # detection_mode: str = "hierarchical"
  1325. # ) -> Dict:
  1326. # """
  1327. # Main method to process image and extract visual attributes.
  1328. # Args:
  1329. # image_url: URL of the product image
  1330. # product_type_hint: Optional hint about product type
  1331. # apply_crop: Whether to apply center crop for better focus
  1332. # detection_mode: "hierarchical" (slower, more accurate) or "flat" (faster)
  1333. # """
  1334. # import time
  1335. # start_time = time.time()
  1336. # try:
  1337. # # Download image
  1338. # image = self.download_image(image_url, apply_crop=apply_crop)
  1339. # if image is None:
  1340. # return {
  1341. # "visual_attributes": {},
  1342. # "error": "Failed to download image"
  1343. # }
  1344. # visual_attributes = {}
  1345. # detailed_predictions = {}
  1346. # # Step 1: Detect product category and type
  1347. # if detection_mode == "hierarchical":
  1348. # detected_category, detected_product_type, category_confidence = \
  1349. # self.detect_category_hierarchical(image)
  1350. # else:
  1351. # detected_category, detected_product_type, category_confidence = \
  1352. # self.detect_category_flat(image)
  1353. # # If confidence is too low, return minimal info
  1354. # if category_confidence < 0.12:
  1355. # logger.warning(f"Low confidence ({category_confidence:.3f}). Returning basic attributes only.")
  1356. # colors = self.extract_dominant_colors(image, n_colors=3)
  1357. # if colors:
  1358. # visual_attributes["primary_color"] = colors[0]["name"]
  1359. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  1360. # return {
  1361. # "visual_attributes": visual_attributes,
  1362. # "category_confidence": category_confidence,
  1363. # "processing_time": round(time.time() - start_time, 2),
  1364. # "warning": "Low confidence detection"
  1365. # }
  1366. # # Add detected information
  1367. # visual_attributes["product_type"] = detected_product_type
  1368. # visual_attributes["category"] = detected_category
  1369. # visual_attributes["detection_confidence"] = round(category_confidence, 3)
  1370. # # Step 2: Extract universal color attribute
  1371. # colors = self.extract_dominant_colors(image, n_colors=3)
  1372. # if colors:
  1373. # visual_attributes["primary_color"] = colors[0]["name"]
  1374. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  1375. # visual_attributes["color_distribution"] = [
  1376. # {"name": c["name"], "percentage": round(c["percentage"], 1)}
  1377. # for c in colors
  1378. # ]
  1379. # # Step 3: Extract category-specific attributes
  1380. # if detected_category in self.CATEGORY_ATTRIBUTES:
  1381. # category_config = self.CATEGORY_ATTRIBUTES[detected_category]
  1382. # for attr_name, attr_values in category_config["attributes"].items():
  1383. # # Skip color since we already extracted it
  1384. # if attr_name == "color":
  1385. # continue
  1386. # # Get attribute-specific threshold
  1387. # threshold = self.CONFIDENCE_THRESHOLDS.get(attr_name, 0.20)
  1388. # # Classify
  1389. # result = self.classify_with_clip(
  1390. # image, attr_values, attr_name, confidence_threshold=threshold
  1391. # )
  1392. # detailed_predictions[attr_name] = result
  1393. # # Only add if confidence is reasonable
  1394. # if result["predictions"]:
  1395. # best_prediction = result["predictions"][0]
  1396. # if best_prediction["confidence"] > threshold:
  1397. # visual_attributes[attr_name] = best_prediction["value"]
  1398. # processing_time = time.time() - start_time
  1399. # return {
  1400. # "visual_attributes": visual_attributes,
  1401. # "detailed_predictions": detailed_predictions,
  1402. # "detection_confidence": round(category_confidence, 3),
  1403. # "processing_time": round(processing_time, 2),
  1404. # "metadata": {
  1405. # "detection_mode": detection_mode,
  1406. # "crop_applied": apply_crop,
  1407. # "image_size": image.size
  1408. # }
  1409. # }
  1410. # except Exception as e:
  1411. # logger.error(f"Error processing image: {str(e)}")
  1412. # import traceback
  1413. # traceback.print_exc()
  1414. # return {
  1415. # "visual_attributes": {},
  1416. # "error": str(e),
  1417. # "processing_time": round(time.time() - start_time, 2)
  1418. # }
  1419. # def batch_process_images(
  1420. # self,
  1421. # image_urls: List[str],
  1422. # detection_mode: str = "flat"
  1423. # ) -> List[Dict]:
  1424. # """
  1425. # Process multiple images in batch.
  1426. # Args:
  1427. # image_urls: List of image URLs
  1428. # detection_mode: Detection mode to use
  1429. # """
  1430. # results = []
  1431. # for i, url in enumerate(image_urls):
  1432. # logger.info(f"Processing image {i+1}/{len(image_urls)}: {url}")
  1433. # result = self.process_image(url, detection_mode=detection_mode)
  1434. # results.append(result)
  1435. # return results
  1436. # @classmethod
  1437. # def cleanup_models(cls):
  1438. # """Free up memory by unloading models."""
  1439. # if cls._clip_model is not None:
  1440. # del cls._clip_model
  1441. # del cls._clip_processor
  1442. # cls._clip_model = None
  1443. # cls._clip_processor = None
  1444. # if torch.cuda.is_available():
  1445. # torch.cuda.empty_cache()
  1446. # logger.info("Models unloaded and memory freed")
  1447. # def get_supported_categories(self) -> List[str]:
  1448. # """Get list of all supported product categories."""
  1449. # return list(self.CATEGORY_ATTRIBUTES.keys())
  1450. # def get_category_products(self, category: str) -> List[str]:
  1451. # """Get list of products in a specific category."""
  1452. # return self.CATEGORY_ATTRIBUTES.get(category, {}).get("products", [])
  1453. # def get_category_attributes(self, category: str) -> Dict[str, List[str]]:
  1454. # """Get attribute schema for a specific category."""
  1455. # return self.CATEGORY_ATTRIBUTES.get(category, {}).get("attributes", {})
  1456. # def get_statistics(self) -> Dict:
  1457. # """Get statistics about the taxonomy."""
  1458. # total_products = sum(
  1459. # len(data["products"])
  1460. # for data in self.CATEGORY_ATTRIBUTES.values()
  1461. # )
  1462. # total_attributes = sum(
  1463. # len(data["attributes"])
  1464. # for data in self.CATEGORY_ATTRIBUTES.values()
  1465. # )
  1466. # return {
  1467. # "total_categories": len(self.CATEGORY_ATTRIBUTES),
  1468. # "total_products": total_products,
  1469. # "total_unique_attributes": len(set(
  1470. # attr
  1471. # for data in self.CATEGORY_ATTRIBUTES.values()
  1472. # for attr in data["attributes"].keys()
  1473. # )),
  1474. # "categories": list(self.CATEGORY_ATTRIBUTES.keys())
  1475. # }
  1476. # # ==================== USAGE EXAMPLES ====================
  1477. # def example_basic_usage():
  1478. # """Basic usage example."""
  1479. # print("=== Basic Usage Example ===\n")
  1480. # # Initialize service
  1481. # service = VisualProcessingService()
  1482. # # Process single image (hierarchical mode - more accurate)
  1483. # result = service.process_image(
  1484. # "https://example.com/product.jpg",
  1485. # detection_mode="hierarchical"
  1486. # )
  1487. # print("Product Type:", result["visual_attributes"].get("product_type"))
  1488. # print("Category:", result["visual_attributes"].get("category"))
  1489. # print("Primary Color:", result["visual_attributes"].get("primary_color"))
  1490. # print("Detection Confidence:", result.get("detection_confidence"))
  1491. # print("Processing Time:", result["processing_time"], "seconds")
  1492. # print("\nAll Attributes:")
  1493. # for key, value in result["visual_attributes"].items():
  1494. # print(f" {key}: {value}")
  1495. # def example_fast_mode():
  1496. # """Fast processing mode example."""
  1497. # print("\n=== Fast Mode Example ===\n")
  1498. # service = VisualProcessingService()
  1499. # # Fast mode (flat detection)
  1500. # result = service.process_image(
  1501. # "https://example.com/product.jpg",
  1502. # detection_mode="flat" # Faster, single-stage detection
  1503. # )
  1504. # print("Processing Time:", result["processing_time"], "seconds")
  1505. # print("Detected:", result["visual_attributes"])
  1506. # def example_with_cropping():
  1507. # """Example with center cropping for busy backgrounds."""
  1508. # print("\n=== With Center Cropping ===\n")
  1509. # service = VisualProcessingService()
  1510. # # Apply center crop to focus on product
  1511. # result = service.process_image(
  1512. # "https://example.com/product-with-background.jpg",
  1513. # apply_crop=True, # Enable center cropping
  1514. # detection_mode="hierarchical"
  1515. # )
  1516. # print("Crop Applied:", result["metadata"]["crop_applied"])
  1517. # print("Detected:", result["visual_attributes"])
  1518. # def example_batch_processing():
  1519. # """Batch processing example."""
  1520. # print("\n=== Batch Processing ===\n")
  1521. # service = VisualProcessingService()
  1522. # image_urls = [
  1523. # "https://example.com/product1.jpg",
  1524. # "https://example.com/product2.jpg",
  1525. # "https://example.com/product3.jpg"
  1526. # ]
  1527. # results = service.batch_process_images(image_urls, detection_mode="flat")
  1528. # for i, result in enumerate(results):
  1529. # print(f"\nProduct {i+1}:")
  1530. # print(f" Type: {result['visual_attributes'].get('product_type')}")
  1531. # print(f" Category: {result['visual_attributes'].get('category')}")
  1532. # print(f" Time: {result['processing_time']}s")
  1533. # def example_category_info():
  1534. # """Get information about supported categories."""
  1535. # print("\n=== Category Information ===\n")
  1536. # service = VisualProcessingService()
  1537. # # Get statistics
  1538. # stats = service.get_statistics()
  1539. # print("Statistics:")
  1540. # print(f" Total Categories: {stats['total_categories']}")
  1541. # print(f" Total Products: {stats['total_products']}")
  1542. # print(f" Unique Attributes: {stats['total_unique_attributes']}")
  1543. # # Get all categories
  1544. # categories = service.get_supported_categories()
  1545. # print(f"\nSupported Categories ({len(categories)}):")
  1546. # for cat in categories:
  1547. # products = service.get_category_products(cat)
  1548. # print(f" {cat}: {len(products)} products")
  1549. # # Get attributes for a specific category
  1550. # print("\nClothing Category Attributes:")
  1551. # clothing_attrs = service.get_category_attributes("clothing")
  1552. # for attr, values in clothing_attrs.items():
  1553. # print(f" {attr}: {len(values)} options")
  1554. # def example_detailed_predictions():
  1555. # """Example showing detailed predictions with confidence scores."""
  1556. # print("\n=== Detailed Predictions ===\n")
  1557. # service = VisualProcessingService()
  1558. # result = service.process_image(
  1559. # "https://example.com/product.jpg",
  1560. # detection_mode="hierarchical"
  1561. # )
  1562. # print("Visual Attributes (Best Predictions):")
  1563. # for key, value in result["visual_attributes"].items():
  1564. # print(f" {key}: {value}")
  1565. # print("\nDetailed Predictions (Top 3 for each attribute):")
  1566. # for attr_name, predictions in result.get("detailed_predictions", {}).items():
  1567. # print(f"\n {attr_name}:")
  1568. # for pred in predictions.get("predictions", []):
  1569. # print(f" - {pred['value']}: {pred['confidence']:.3f}")
  1570. # def example_color_distribution():
  1571. # """Example showing color palette extraction."""
  1572. # print("\n=== Color Distribution ===\n")
  1573. # service = VisualProcessingService()
  1574. # result = service.process_image("https://example.com/product.jpg")
  1575. # print("Primary Color:", result["visual_attributes"].get("primary_color"))
  1576. # print("\nColor Palette:")
  1577. # for color in result["visual_attributes"].get("color_palette", []):
  1578. # print(f" - {color}")
  1579. # print("\nColor Distribution:")
  1580. # for color_info in result["visual_attributes"].get("color_distribution", []):
  1581. # print(f" {color_info['name']}: {color_info['percentage']}%")
  1582. # def example_error_handling():
  1583. # """Example showing error handling."""
  1584. # print("\n=== Error Handling ===\n")
  1585. # service = VisualProcessingService()
  1586. # # Invalid URL
  1587. # result = service.process_image("https://invalid-url.com/nonexistent.jpg")
  1588. # if "error" in result:
  1589. # print("Error occurred:", result["error"])
  1590. # else:
  1591. # print("Processing successful")
  1592. # # Low confidence warning
  1593. # result = service.process_image("https://example.com/ambiguous-product.jpg")
  1594. # if "warning" in result:
  1595. # print("Warning:", result["warning"])
  1596. # print("Confidence:", result.get("category_confidence"))
  1597. # def example_cleanup():
  1598. # """Example showing model cleanup."""
  1599. # print("\n=== Model Cleanup ===\n")
  1600. # service = VisualProcessingService()
  1601. # # Process some images
  1602. # result = service.process_image("https://example.com/product.jpg")
  1603. # print("Processed successfully")
  1604. # # Clean up models when done (frees memory)
  1605. # VisualProcessingService.cleanup_models()
  1606. # print("Models cleaned up and memory freed")
  1607. # # ==================== PRODUCTION USAGE ====================
  1608. # def production_example():
  1609. # """
  1610. # Production-ready example with proper error handling and logging.
  1611. # """
  1612. # import logging
  1613. # # Setup logging
  1614. # logging.basicConfig(
  1615. # level=logging.INFO,
  1616. # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
  1617. # )
  1618. # service = VisualProcessingService()
  1619. # def process_product_image(image_url: str, product_id: str) -> Dict:
  1620. # """
  1621. # Process a product image with full error handling.
  1622. # """
  1623. # try:
  1624. # # Process with hierarchical mode for best accuracy
  1625. # result = service.process_image(
  1626. # image_url,
  1627. # detection_mode="hierarchical",
  1628. # apply_crop=False # Set True if images have busy backgrounds
  1629. # )
  1630. # # Check for errors
  1631. # if "error" in result:
  1632. # logger.error(f"Failed to process {product_id}: {result['error']}")
  1633. # return {
  1634. # "product_id": product_id,
  1635. # "status": "error",
  1636. # "error": result["error"]
  1637. # }
  1638. # # Check confidence
  1639. # confidence = result.get("detection_confidence", 0)
  1640. # if confidence < 0.15:
  1641. # logger.warning(f"Low confidence for {product_id}: {confidence}")
  1642. # return {
  1643. # "product_id": product_id,
  1644. # "status": "low_confidence",
  1645. # "confidence": confidence,
  1646. # "partial_attributes": result["visual_attributes"]
  1647. # }
  1648. # # Success
  1649. # return {
  1650. # "product_id": product_id,
  1651. # "status": "success",
  1652. # "attributes": result["visual_attributes"],
  1653. # "confidence": confidence,
  1654. # "processing_time": result["processing_time"]
  1655. # }
  1656. # except Exception as e:
  1657. # logger.exception(f"Unexpected error processing {product_id}")
  1658. # return {
  1659. # "product_id": product_id,
  1660. # "status": "exception",
  1661. # "error": str(e)
  1662. # }
  1663. # # Process products
  1664. # products = [
  1665. # {"id": "PROD001", "image_url": "https://example.com/tshirt.jpg"},
  1666. # {"id": "PROD002", "image_url": "https://example.com/laptop.jpg"},
  1667. # {"id": "PROD003", "image_url": "https://example.com/chair.jpg"}
  1668. # ]
  1669. # results = []
  1670. # for product in products:
  1671. # result = process_product_image(product["image_url"], product["id"])
  1672. # results.append(result)
  1673. # # Print summary
  1674. # if result["status"] == "success":
  1675. # attrs = result["attributes"]
  1676. # print(f"\n✓ {product['id']} ({result['processing_time']}s):")
  1677. # print(f" Type: {attrs.get('product_type')}")
  1678. # print(f" Category: {attrs.get('category')}")
  1679. # print(f" Color: {attrs.get('primary_color')}")
  1680. # else:
  1681. # print(f"\n✗ {product['id']}: {result['status']}")
  1682. # return results
  1683. # # ==================== MAIN ====================
  1684. # if __name__ == "__main__":
  1685. # # Run examples
  1686. # print("Enhanced Visual Processing Service")
  1687. # print("=" * 60)
  1688. # # Show statistics
  1689. # service = VisualProcessingService()
  1690. # stats = service.get_statistics()
  1691. # print(f"\nTaxonomy Coverage:")
  1692. # print(f" Categories: {stats['total_categories']}")
  1693. # print(f" Products: {stats['total_products']}")
  1694. # print(f" Attributes: {stats['total_unique_attributes']}")
  1695. # print("\n" + "=" * 60)
  1696. # print("Run individual examples by calling the example functions:")
  1697. # print(" - example_basic_usage()")
  1698. # print(" - example_fast_mode()")
  1699. # print(" - example_with_cropping()")
  1700. # print(" - example_batch_processing()")
  1701. # print(" - example_category_info()")
  1702. # print(" - example_detailed_predictions()")
  1703. # print(" - example_color_distribution()")
  1704. # print(" - production_example()")
  1705. # print("=" * 60)