visual_processing_service.py 82 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025
  1. # # ==================== visual_processing_service.py (FIXED - Dynamic Detection) ====================
  2. # import torch
  3. # import cv2
  4. # import numpy as np
  5. # import requests
  6. # from io import BytesIO
  7. # from PIL import Image
  8. # from typing import Dict, List, Optional, Tuple
  9. # import logging
  10. # from transformers import CLIPProcessor, CLIPModel
  11. # from sklearn.cluster import KMeans
  12. # logger = logging.getLogger(__name__)
  13. # class VisualProcessingService:
  14. # """Service for extracting visual attributes from product images using CLIP."""
  15. # # Class-level caching (shared across instances)
  16. # _clip_model = None
  17. # _clip_processor = None
  18. # _device = None
  19. # # Define category-specific attributes
  20. # CATEGORY_ATTRIBUTES = {
  21. # "clothing": {
  22. # "products": ["t-shirt", "shirt", "dress", "pants", "jeans", "shorts",
  23. # "skirt", "jacket", "coat", "sweater", "hoodie", "top", "blouse"],
  24. # "attributes": {
  25. # "pattern": ["solid color", "striped", "checkered", "graphic print", "floral", "geometric", "plain"],
  26. # "material": ["cotton", "polyester", "denim", "leather", "silk", "wool", "linen", "blend"],
  27. # "style": ["casual", "formal", "sporty", "streetwear", "elegant", "vintage", "bohemian"],
  28. # "fit": ["slim fit", "regular fit", "loose fit", "oversized", "tailored"],
  29. # "neckline": ["crew neck", "v-neck", "round neck", "collar", "scoop neck"],
  30. # "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", "3/4 sleeve"],
  31. # "closure_type": ["button", "zipper", "pull-on", "snap", "tie"]
  32. # }
  33. # },
  34. # "tools": {
  35. # "products": ["screwdriver", "hammer", "wrench", "pliers", "drill", "saw",
  36. # "measuring tape", "level", "chisel", "file"],
  37. # "attributes": {
  38. # "material": ["steel", "aluminum", "plastic", "wood", "rubber", "chrome"],
  39. # "type": ["manual", "electric", "pneumatic", "cordless", "corded"],
  40. # "finish": ["chrome plated", "powder coated", "stainless steel", "painted"],
  41. # "handle_type": ["rubber grip", "plastic", "wooden", "cushioned", "ergonomic"]
  42. # }
  43. # },
  44. # "electronics": {
  45. # "products": ["phone", "laptop", "tablet", "headphones", "speaker", "camera",
  46. # "smartwatch", "charger", "mouse", "keyboard"],
  47. # "attributes": {
  48. # "material": ["plastic", "metal", "glass", "aluminum", "rubber"],
  49. # "style": ["modern", "minimalist", "sleek", "industrial", "vintage"],
  50. # "finish": ["matte", "glossy", "metallic", "textured"],
  51. # "connectivity": ["wireless", "wired", "bluetooth", "USB"]
  52. # }
  53. # },
  54. # "furniture": {
  55. # "products": ["chair", "table", "sofa", "bed", "desk", "shelf", "cabinet",
  56. # "dresser", "bench", "stool"],
  57. # "attributes": {
  58. # "material": ["wood", "metal", "glass", "plastic", "fabric", "leather"],
  59. # "style": ["modern", "traditional", "industrial", "rustic", "contemporary", "vintage"],
  60. # "finish": ["natural wood", "painted", "stained", "laminated", "upholstered"]
  61. # }
  62. # },
  63. # "home_decor": {
  64. # "products": ["painting", "canvas", "wall art", "frame", "vase", "lamp",
  65. # "mirror", "clock", "sculpture", "poster"],
  66. # "attributes": {
  67. # "style": ["modern", "abstract", "traditional", "contemporary", "vintage", "minimalist"],
  68. # "material": ["canvas", "wood", "metal", "glass", "ceramic", "paper"],
  69. # "finish": ["glossy", "matte", "textured", "framed", "gallery wrapped"],
  70. # "theme": ["nature", "geometric", "floral", "landscape", "portrait", "abstract"]
  71. # }
  72. # },
  73. # "kitchen": {
  74. # "products": ["pot", "pan", "knife", "utensil", "plate", "bowl", "cup",
  75. # "appliance", "cutting board", "container"],
  76. # "attributes": {
  77. # "material": ["stainless steel", "aluminum", "ceramic", "glass", "plastic", "wood"],
  78. # "finish": ["non-stick", "stainless", "enameled", "anodized"],
  79. # "type": ["manual", "electric", "dishwasher safe"]
  80. # }
  81. # }
  82. # }
  83. # def __init__(self):
  84. # pass
  85. # @classmethod
  86. # def _get_device(cls):
  87. # """Get optimal device."""
  88. # if cls._device is None:
  89. # cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  90. # logger.info(f"Visual Processing using device: {cls._device}")
  91. # return cls._device
  92. # @classmethod
  93. # def _get_clip_model(cls):
  94. # """Lazy load CLIP model with class-level caching."""
  95. # if cls._clip_model is None:
  96. # logger.info("Loading CLIP model (this may take a few minutes on first use)...")
  97. # cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  98. # cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  99. # device = cls._get_device()
  100. # cls._clip_model.to(device)
  101. # cls._clip_model.eval()
  102. # logger.info("✓ CLIP model loaded successfully")
  103. # return cls._clip_model, cls._clip_processor
  104. # def download_image(self, image_url: str) -> Optional[Image.Image]:
  105. # """Download image from URL."""
  106. # try:
  107. # response = requests.get(image_url, timeout=10)
  108. # response.raise_for_status()
  109. # image = Image.open(BytesIO(response.content)).convert('RGB')
  110. # return image
  111. # except Exception as e:
  112. # logger.error(f"Error downloading image from {image_url}: {str(e)}")
  113. # return None
  114. # def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
  115. # """Extract dominant colors using K-means."""
  116. # try:
  117. # # Resize for faster processing
  118. # img_small = image.resize((150, 150))
  119. # img_array = np.array(img_small)
  120. # pixels = img_array.reshape(-1, 3)
  121. # # K-means clustering
  122. # kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5)
  123. # kmeans.fit(pixels)
  124. # colors = []
  125. # labels_counts = np.bincount(kmeans.labels_)
  126. # for i, center in enumerate(kmeans.cluster_centers_):
  127. # rgb = tuple(center.astype(int))
  128. # color_name = self._get_color_name_simple(rgb)
  129. # percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
  130. # colors.append({
  131. # "name": color_name,
  132. # "rgb": rgb,
  133. # "percentage": percentage
  134. # })
  135. # colors.sort(key=lambda x: x['percentage'], reverse=True)
  136. # return colors
  137. # except Exception as e:
  138. # logger.error(f"Error extracting colors: {str(e)}")
  139. # return []
  140. # def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
  141. # """
  142. # Simple color name detection without webcolors dependency.
  143. # Maps RGB to basic color names.
  144. # """
  145. # r, g, b = rgb
  146. # # Define basic color ranges
  147. # colors = {
  148. # 'black': (r < 50 and g < 50 and b < 50),
  149. # 'white': (r > 200 and g > 200 and b > 200),
  150. # 'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
  151. # 'red': (r > 150 and g < 100 and b < 100),
  152. # 'green': (g > 150 and r < 100 and b < 100),
  153. # 'blue': (b > 150 and r < 100 and g < 100),
  154. # 'yellow': (r > 200 and g > 200 and b < 100),
  155. # 'orange': (r > 200 and 100 < g < 200 and b < 100),
  156. # 'purple': (r > 100 and b > 100 and g < 100),
  157. # 'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
  158. # 'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
  159. # 'cyan': (r < 100 and g > 150 and b > 150),
  160. # }
  161. # for color_name, condition in colors.items():
  162. # if condition:
  163. # return color_name
  164. # # Default fallback
  165. # if r > g and r > b:
  166. # return 'red'
  167. # elif g > r and g > b:
  168. # return 'green'
  169. # elif b > r and b > g:
  170. # return 'blue'
  171. # else:
  172. # return 'gray'
  173. # def classify_with_clip(
  174. # self,
  175. # image: Image.Image,
  176. # candidates: List[str],
  177. # attribute_name: str,
  178. # confidence_threshold: float = 0.15
  179. # ) -> Dict:
  180. # """Use CLIP to classify image against candidate labels."""
  181. # try:
  182. # model, processor = self._get_clip_model()
  183. # device = self._get_device()
  184. # # Prepare inputs
  185. # inputs = processor(
  186. # text=candidates,
  187. # images=image,
  188. # return_tensors="pt",
  189. # padding=True
  190. # )
  191. # # Move to device
  192. # inputs = {k: v.to(device) for k, v in inputs.items()}
  193. # # Get predictions
  194. # with torch.no_grad():
  195. # outputs = model(**inputs)
  196. # logits_per_image = outputs.logits_per_image
  197. # probs = logits_per_image.softmax(dim=1).cpu()
  198. # # Get top predictions
  199. # top_k = min(3, len(candidates))
  200. # top_probs, top_indices = torch.topk(probs[0], k=top_k)
  201. # results = []
  202. # for prob, idx in zip(top_probs, top_indices):
  203. # if prob.item() > confidence_threshold:
  204. # results.append({
  205. # "value": candidates[idx.item()],
  206. # "confidence": float(prob.item())
  207. # })
  208. # return {
  209. # "attribute": attribute_name,
  210. # "predictions": results
  211. # }
  212. # except Exception as e:
  213. # logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  214. # return {"attribute": attribute_name, "predictions": []}
  215. # def detect_product_category(self, image: Image.Image) -> Tuple[str, float]:
  216. # """
  217. # First detect which category the product belongs to.
  218. # Returns: (category_name, confidence)
  219. # """
  220. # # Get all product types from all categories
  221. # all_categories = []
  222. # category_map = {}
  223. # for category, data in self.CATEGORY_ATTRIBUTES.items():
  224. # for product in data["products"]:
  225. # all_categories.append(f"a photo of a {product}")
  226. # category_map[f"a photo of a {product}"] = category
  227. # # Classify
  228. # result = self.classify_with_clip(image, all_categories, "category_detection", confidence_threshold=0.10)
  229. # if result["predictions"]:
  230. # best_match = result["predictions"][0]
  231. # detected_category = category_map[best_match["value"]]
  232. # product_type = best_match["value"].replace("a photo of a ", "")
  233. # confidence = best_match["confidence"]
  234. # logger.info(f"Detected category: {detected_category}, product: {product_type}, confidence: {confidence:.3f}")
  235. # return detected_category, product_type, confidence
  236. # return "unknown", "unknown", 0.0
  237. # def process_image(
  238. # self,
  239. # image_url: str,
  240. # product_type_hint: Optional[str] = None
  241. # ) -> Dict:
  242. # """
  243. # Main method to process image and extract visual attributes.
  244. # Now dynamically detects product category first.
  245. # """
  246. # import time
  247. # start_time = time.time()
  248. # try:
  249. # # Download image
  250. # image = self.download_image(image_url)
  251. # if image is None:
  252. # return {
  253. # "visual_attributes": {},
  254. # "error": "Failed to download image"
  255. # }
  256. # visual_attributes = {}
  257. # detailed_predictions = {}
  258. # # Step 1: Detect product category
  259. # detected_category, detected_product_type, category_confidence = self.detect_product_category(image)
  260. # # If confidence is too low, return minimal info
  261. # if category_confidence < 0.10:
  262. # logger.warning(f"Low confidence in category detection ({category_confidence:.3f}). Returning basic attributes only.")
  263. # colors = self.extract_dominant_colors(image, n_colors=3)
  264. # if colors:
  265. # visual_attributes["primary_color"] = colors[0]["name"]
  266. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  267. # return {
  268. # "visual_attributes": visual_attributes,
  269. # "category_confidence": category_confidence,
  270. # "processing_time": round(time.time() - start_time, 2)
  271. # }
  272. # # Add detected product type
  273. # visual_attributes["product_type"] = detected_product_type
  274. # visual_attributes["category"] = detected_category
  275. # # Step 2: Extract color (universal attribute)
  276. # colors = self.extract_dominant_colors(image, n_colors=3)
  277. # if colors:
  278. # visual_attributes["primary_color"] = colors[0]["name"]
  279. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  280. # # Step 3: Extract category-specific attributes
  281. # if detected_category in self.CATEGORY_ATTRIBUTES:
  282. # category_config = self.CATEGORY_ATTRIBUTES[detected_category]
  283. # for attr_name, attr_values in category_config["attributes"].items():
  284. # # Use higher confidence threshold for category-specific attributes
  285. # result = self.classify_with_clip(image, attr_values, attr_name, confidence_threshold=0.20)
  286. # if result["predictions"]:
  287. # # Only add if confidence is reasonable
  288. # best_prediction = result["predictions"][0]
  289. # if best_prediction["confidence"] > 0.20:
  290. # visual_attributes[attr_name] = best_prediction["value"]
  291. # detailed_predictions[attr_name] = result
  292. # processing_time = time.time() - start_time
  293. # return {
  294. # "visual_attributes": visual_attributes,
  295. # "detailed_predictions": detailed_predictions,
  296. # "category_confidence": category_confidence,
  297. # "processing_time": round(processing_time, 2)
  298. # }
  299. # except Exception as e:
  300. # logger.error(f"Error processing image: {str(e)}")
  301. # return {
  302. # "visual_attributes": {},
  303. # "error": str(e),
  304. # "processing_time": round(time.time() - start_time, 2)
  305. # }
  306. # ==================== visual_processing_service.py (FIXED - Smart Subcategory Detection) ====================
  307. import torch
  308. import numpy as np
  309. import requests
  310. from io import BytesIO
  311. from PIL import Image
  312. from typing import Dict, List, Optional, Tuple
  313. import logging
  314. from transformers import CLIPProcessor, CLIPModel
  315. from sklearn.cluster import KMeans
  316. logger = logging.getLogger(__name__)
  317. class VisualProcessingService:
  318. """Service for extracting visual attributes from product images using CLIP with smart subcategory detection."""
  319. # Class-level caching (shared across instances)
  320. _clip_model = None
  321. _clip_processor = None
  322. _device = None
  323. # Define hierarchical category structure with subcategories
  324. CATEGORY_ATTRIBUTES = {
  325. "clothing": {
  326. "subcategories": {
  327. "tops": {
  328. "products": ["t-shirt", "shirt", "blouse", "top", "sweater", "hoodie", "tank top", "polo shirt"],
  329. "attributes": {
  330. "pattern": ["solid color", "striped", "checkered", "graphic print", "floral", "geometric", "plain", "logo print"],
  331. "material": ["cotton", "polyester", "silk", "wool", "linen", "blend", "knit"],
  332. "style": ["casual", "formal", "sporty", "streetwear", "elegant", "vintage", "minimalist"],
  333. "fit": ["slim fit", "regular fit", "loose fit", "oversized", "fitted"],
  334. "neckline": ["crew neck", "v-neck", "round neck", "collar", "scoop neck", "henley"],
  335. "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", "3/4 sleeve", "cap sleeve"],
  336. "closure_type": ["button-up", "zipper", "pull-on", "snap button"]
  337. }
  338. },
  339. "bottoms": {
  340. "products": ["jeans", "pants", "trousers", "shorts", "chinos", "cargo pants", "leggings"],
  341. "attributes": {
  342. "pattern": ["solid color", "distressed", "faded", "plain", "washed", "dark wash", "light wash"],
  343. "material": ["denim", "cotton", "polyester", "wool", "blend", "twill", "corduroy"],
  344. "style": ["casual", "formal", "sporty", "vintage", "modern", "workwear"],
  345. "fit": ["slim fit", "regular fit", "loose fit", "skinny", "bootcut", "straight leg", "relaxed fit"],
  346. "rise": ["high rise", "mid rise", "low rise"],
  347. "closure_type": ["button fly", "zipper fly", "elastic waist", "drawstring"],
  348. "length": ["full length", "cropped", "ankle length", "capri"]
  349. }
  350. },
  351. "dresses_skirts": {
  352. "products": ["dress", "skirt", "gown", "sundress", "maxi dress", "mini skirt"],
  353. "attributes": {
  354. "pattern": ["solid color", "floral", "striped", "geometric", "plain", "printed", "polka dot"],
  355. "material": ["cotton", "silk", "polyester", "linen", "blend", "chiffon", "satin"],
  356. "style": ["casual", "formal", "cocktail", "bohemian", "vintage", "elegant", "party"],
  357. "fit": ["fitted", "loose", "a-line", "bodycon", "flowy", "wrap"],
  358. "neckline": ["crew neck", "v-neck", "scoop neck", "halter", "off-shoulder", "sweetheart"],
  359. "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", "3/4 sleeve", "flutter sleeve"],
  360. "length": ["mini", "midi", "maxi", "knee-length", "floor-length"]
  361. }
  362. },
  363. "outerwear": {
  364. "products": ["jacket", "coat", "blazer", "windbreaker", "parka", "bomber jacket", "denim jacket"],
  365. "attributes": {
  366. "pattern": ["solid color", "plain", "quilted", "textured"],
  367. "material": ["leather", "denim", "wool", "polyester", "cotton", "nylon", "fleece"],
  368. "style": ["casual", "formal", "sporty", "vintage", "military", "biker"],
  369. "fit": ["slim fit", "regular fit", "oversized", "cropped"],
  370. "closure_type": ["zipper", "button", "snap button", "toggle"],
  371. "length": ["cropped", "hip length", "thigh length", "knee length"]
  372. }
  373. }
  374. }
  375. },
  376. "footwear": {
  377. "products": ["sneakers", "boots", "sandals", "heels", "loafers", "flats", "slippers"],
  378. "attributes": {
  379. "material": ["leather", "canvas", "suede", "synthetic", "rubber", "mesh"],
  380. "style": ["casual", "formal", "athletic", "vintage", "modern"],
  381. "closure_type": ["lace-up", "slip-on", "velcro", "buckle", "zipper"],
  382. "toe_style": ["round toe", "pointed toe", "square toe", "open toe", "closed toe"]
  383. }
  384. },
  385. "tools": {
  386. "products": ["screwdriver", "hammer", "wrench", "pliers", "drill", "saw", "measuring tape"],
  387. "attributes": {
  388. "material": ["steel", "aluminum", "plastic", "rubber", "chrome", "iron"],
  389. "type": ["manual", "electric", "pneumatic", "cordless", "corded"],
  390. "finish": ["chrome plated", "powder coated", "stainless steel", "painted"],
  391. "handle_type": ["rubber grip", "plastic", "wooden", "ergonomic", "cushioned"]
  392. }
  393. },
  394. "electronics": {
  395. "products": ["phone", "laptop", "tablet", "headphones", "speaker", "camera", "smartwatch", "earbuds"],
  396. "attributes": {
  397. "material": ["plastic", "metal", "glass", "aluminum", "rubber", "silicone"],
  398. "style": ["modern", "minimalist", "sleek", "industrial", "vintage"],
  399. "finish": ["matte", "glossy", "metallic", "textured", "transparent"],
  400. "connectivity": ["wireless", "wired", "bluetooth", "USB-C", "USB"]
  401. }
  402. },
  403. "furniture": {
  404. "products": ["chair", "table", "sofa", "bed", "desk", "shelf", "cabinet", "bench"],
  405. "attributes": {
  406. "material": ["wood", "metal", "glass", "plastic", "fabric", "leather", "rattan"],
  407. "style": ["modern", "traditional", "industrial", "rustic", "contemporary", "vintage", "scandinavian"],
  408. "finish": ["natural wood", "painted", "stained", "laminated", "upholstered", "polished"]
  409. }
  410. }
  411. }
  412. def __init__(self):
  413. pass
  414. @classmethod
  415. def _get_device(cls):
  416. """Get optimal device."""
  417. if cls._device is None:
  418. cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  419. logger.info(f"Visual Processing using device: {cls._device}")
  420. return cls._device
  421. @classmethod
  422. def _get_clip_model(cls):
  423. """Lazy load CLIP model with class-level caching."""
  424. if cls._clip_model is None:
  425. logger.info("Loading CLIP model (this may take a few minutes on first use)...")
  426. cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  427. cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  428. device = cls._get_device()
  429. cls._clip_model.to(device)
  430. cls._clip_model.eval()
  431. logger.info("✓ CLIP model loaded successfully")
  432. return cls._clip_model, cls._clip_processor
  433. def download_image(self, image_url: str) -> Optional[Image.Image]:
  434. """Download image from URL."""
  435. try:
  436. response = requests.get(image_url, timeout=10)
  437. response.raise_for_status()
  438. image = Image.open(BytesIO(response.content)).convert('RGB')
  439. return image
  440. except Exception as e:
  441. logger.error(f"Error downloading image from {image_url}: {str(e)}")
  442. return None
  443. def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
  444. """Extract dominant colors using K-means clustering."""
  445. try:
  446. # Resize for faster processing
  447. img_small = image.resize((150, 150))
  448. img_array = np.array(img_small)
  449. pixels = img_array.reshape(-1, 3)
  450. # K-means clustering
  451. kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5)
  452. kmeans.fit(pixels)
  453. colors = []
  454. labels_counts = np.bincount(kmeans.labels_)
  455. for i, center in enumerate(kmeans.cluster_centers_):
  456. rgb = tuple(center.astype(int))
  457. color_name = self._get_color_name_simple(rgb)
  458. percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
  459. colors.append({
  460. "name": color_name,
  461. "rgb": rgb,
  462. "percentage": round(percentage, 2)
  463. })
  464. # Sort by percentage (most dominant first)
  465. colors.sort(key=lambda x: x['percentage'], reverse=True)
  466. return colors
  467. except Exception as e:
  468. logger.error(f"Error extracting colors: {str(e)}")
  469. return []
  470. def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
  471. """Map RGB values to basic color names."""
  472. r, g, b = rgb
  473. # Define color ranges with priorities
  474. colors = {
  475. 'black': (r < 50 and g < 50 and b < 50),
  476. 'white': (r > 200 and g > 200 and b > 200),
  477. 'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
  478. 'red': (r > 150 and g < 100 and b < 100),
  479. 'green': (g > 150 and r < 100 and b < 100),
  480. 'blue': (b > 150 and r < 100 and g < 100),
  481. 'yellow': (r > 200 and g > 200 and b < 100),
  482. 'orange': (r > 200 and 100 < g < 200 and b < 100),
  483. 'purple': (r > 100 and b > 100 and g < 100),
  484. 'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
  485. 'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
  486. 'cyan': (r < 100 and g > 150 and b > 150),
  487. 'beige': (180 < r < 240 and 160 < g < 220 and 120 < b < 180),
  488. }
  489. for color_name, condition in colors.items():
  490. if condition:
  491. return color_name
  492. # Fallback to dominant channel
  493. if r > g and r > b:
  494. return 'red'
  495. elif g > r and g > b:
  496. return 'green'
  497. elif b > r and b > g:
  498. return 'blue'
  499. else:
  500. return 'gray'
  501. def classify_with_clip(
  502. self,
  503. image: Image.Image,
  504. candidates: List[str],
  505. attribute_name: str,
  506. confidence_threshold: float = 0.15
  507. ) -> Dict:
  508. """Use CLIP to classify image against candidate labels."""
  509. try:
  510. model, processor = self._get_clip_model()
  511. device = self._get_device()
  512. # Prepare inputs
  513. inputs = processor(
  514. text=candidates,
  515. images=image,
  516. return_tensors="pt",
  517. padding=True
  518. )
  519. # Move to device
  520. inputs = {k: v.to(device) for k, v in inputs.items()}
  521. # Get predictions
  522. with torch.no_grad():
  523. outputs = model(**inputs)
  524. logits_per_image = outputs.logits_per_image
  525. probs = logits_per_image.softmax(dim=1).cpu()
  526. # Get top predictions
  527. top_k = min(3, len(candidates))
  528. top_probs, top_indices = torch.topk(probs[0], k=top_k)
  529. results = []
  530. for prob, idx in zip(top_probs, top_indices):
  531. if prob.item() > confidence_threshold:
  532. results.append({
  533. "value": candidates[idx.item()],
  534. "confidence": round(float(prob.item()), 3)
  535. })
  536. return {
  537. "attribute": attribute_name,
  538. "predictions": results
  539. }
  540. except Exception as e:
  541. logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  542. return {"attribute": attribute_name, "predictions": []}
  543. def detect_category_and_subcategory(self, image: Image.Image) -> Tuple[str, str, str, float]:
  544. """
  545. Hierarchically detect category, subcategory, and specific product.
  546. Returns: (category, subcategory, product_type, confidence)
  547. """
  548. # Step 1: Detect if it's clothing or something else
  549. main_categories = list(self.CATEGORY_ATTRIBUTES.keys())
  550. category_prompts = [f"a photo of {cat}" for cat in main_categories]
  551. result = self.classify_with_clip(image, category_prompts, "main_category", confidence_threshold=0.10)
  552. if not result["predictions"]:
  553. return "unknown", "unknown", "unknown", 0.0
  554. detected_category = result["predictions"][0]["value"].replace("a photo of ", "")
  555. category_confidence = result["predictions"][0]["confidence"]
  556. logger.info(f"Step 1 - Main category detected: {detected_category} (confidence: {category_confidence:.3f})")
  557. # Step 2: For clothing, detect subcategory (tops/bottoms/dresses/outerwear)
  558. if detected_category == "clothing":
  559. subcategories = self.CATEGORY_ATTRIBUTES["clothing"]["subcategories"]
  560. # Collect all products grouped by subcategory
  561. all_products = []
  562. product_to_subcategory = {}
  563. for subcat, subcat_data in subcategories.items():
  564. for product in subcat_data["products"]:
  565. prompt = f"a photo of {product}"
  566. all_products.append(prompt)
  567. product_to_subcategory[prompt] = subcat
  568. # Step 3: Detect specific product type
  569. product_result = self.classify_with_clip(
  570. image,
  571. all_products,
  572. "product_type",
  573. confidence_threshold=0.12
  574. )
  575. if product_result["predictions"]:
  576. best_match = product_result["predictions"][0]
  577. product_prompt = best_match["value"]
  578. product_type = product_prompt.replace("a photo of ", "")
  579. subcategory = product_to_subcategory[product_prompt]
  580. product_confidence = best_match["confidence"]
  581. logger.info(f"Step 2 - Detected: {subcategory} > {product_type} (confidence: {product_confidence:.3f})")
  582. return detected_category, subcategory, product_type, product_confidence
  583. else:
  584. logger.warning("Could not detect specific product type for clothing")
  585. return detected_category, "unknown", "unknown", category_confidence
  586. # Step 3: For non-clothing categories, just detect product type
  587. else:
  588. category_data = self.CATEGORY_ATTRIBUTES[detected_category]
  589. # Check if this category has subcategories or direct products
  590. if "products" in category_data:
  591. products = category_data["products"]
  592. product_prompts = [f"a photo of {p}" for p in products]
  593. product_result = self.classify_with_clip(
  594. image,
  595. product_prompts,
  596. "product_type",
  597. confidence_threshold=0.12
  598. )
  599. if product_result["predictions"]:
  600. best_match = product_result["predictions"][0]
  601. product_type = best_match["value"].replace("a photo of ", "")
  602. logger.info(f"Step 2 - Detected: {detected_category} > {product_type}")
  603. return detected_category, "none", product_type, best_match["confidence"]
  604. return detected_category, "unknown", "unknown", category_confidence
  605. def process_image(
  606. self,
  607. image_url: str,
  608. product_type_hint: Optional[str] = None
  609. ) -> Dict:
  610. """
  611. Main method to process image and extract visual attributes.
  612. Uses hierarchical detection to extract only relevant attributes.
  613. """
  614. import time
  615. start_time = time.time()
  616. try:
  617. # Download image
  618. image = self.download_image(image_url)
  619. if image is None:
  620. return {
  621. "visual_attributes": {},
  622. "error": "Failed to download image"
  623. }
  624. visual_attributes = {}
  625. detailed_predictions = {}
  626. # Step 1: Detect category, subcategory, and product type
  627. category, subcategory, product_type, confidence = self.detect_category_and_subcategory(image)
  628. # Low confidence check
  629. if confidence < 0.10:
  630. logger.warning(f"Low confidence in detection ({confidence:.3f}). Returning basic attributes only.")
  631. colors = self.extract_dominant_colors(image, n_colors=3)
  632. if colors:
  633. visual_attributes["primary_color"] = colors[0]["name"]
  634. visual_attributes["color_palette"] = [c["name"] for c in colors]
  635. return {
  636. "visual_attributes": visual_attributes,
  637. "detection_confidence": confidence,
  638. "warning": "Low confidence detection",
  639. "processing_time": round(time.time() - start_time, 2)
  640. }
  641. # Add detected metadata
  642. visual_attributes["product_type"] = product_type
  643. visual_attributes["category"] = category
  644. if subcategory != "none" and subcategory != "unknown":
  645. visual_attributes["subcategory"] = subcategory
  646. # Step 2: Extract color information (universal)
  647. colors = self.extract_dominant_colors(image, n_colors=3)
  648. if colors:
  649. visual_attributes["primary_color"] = colors[0]["name"]
  650. visual_attributes["color_palette"] = [c["name"] for c in colors[:3]]
  651. visual_attributes["color_distribution"] = [
  652. {"color": c["name"], "percentage": c["percentage"]}
  653. for c in colors
  654. ]
  655. # Step 3: Get the right attribute configuration based on subcategory
  656. attributes_config = None
  657. if category == "clothing":
  658. if subcategory in self.CATEGORY_ATTRIBUTES["clothing"]["subcategories"]:
  659. attributes_config = self.CATEGORY_ATTRIBUTES["clothing"]["subcategories"][subcategory]["attributes"]
  660. logger.info(f"Using attributes for subcategory: {subcategory}")
  661. else:
  662. logger.warning(f"Unknown subcategory: {subcategory}. Skipping attribute extraction.")
  663. elif category in self.CATEGORY_ATTRIBUTES:
  664. if "attributes" in self.CATEGORY_ATTRIBUTES[category]:
  665. attributes_config = self.CATEGORY_ATTRIBUTES[category]["attributes"]
  666. logger.info(f"Using attributes for category: {category}")
  667. # Step 4: Extract category-specific attributes
  668. if attributes_config:
  669. for attr_name, attr_values in attributes_config.items():
  670. result = self.classify_with_clip(
  671. image,
  672. attr_values,
  673. attr_name,
  674. confidence_threshold=0.20
  675. )
  676. if result["predictions"]:
  677. best_prediction = result["predictions"][0]
  678. # Only add attributes with reasonable confidence
  679. if best_prediction["confidence"] > 0.20:
  680. visual_attributes[attr_name] = best_prediction["value"]
  681. # Store detailed predictions for debugging
  682. detailed_predictions[attr_name] = result
  683. processing_time = time.time() - start_time
  684. logger.info(f"✓ Processing complete in {processing_time:.2f}s. Extracted {len(visual_attributes)} attributes.")
  685. return {
  686. "visual_attributes": visual_attributes,
  687. "detailed_predictions": detailed_predictions,
  688. "detection_confidence": confidence,
  689. "processing_time": round(processing_time, 2)
  690. }
  691. except Exception as e:
  692. logger.error(f"Error processing image: {str(e)}")
  693. return {
  694. "visual_attributes": {},
  695. "error": str(e),
  696. "processing_time": round(time.time() - start_time, 2)
  697. }
  698. # # ==================== visual_processing_service_enhanced.py ====================
  699. # """
  700. # Enhanced Visual Processing Service combining CLIP's speed with BLIP-2's comprehensive taxonomy.
  701. # Features:
  702. # - Fast CLIP-based classification
  703. # - 70+ product categories across multiple domains
  704. # - Two-stage classification with validation
  705. # - Enhanced color normalization
  706. # - Category-specific attribute detection
  707. # - Confidence-based fallback mechanisms
  708. # - Optional center cropping for better focus
  709. # Usage:
  710. # service = VisualProcessingService()
  711. # result = service.process_image("https://example.com/product.jpg")
  712. # """
  713. # import torch
  714. # import cv2
  715. # import numpy as np
  716. # import requests
  717. # from io import BytesIO
  718. # from PIL import Image
  719. # from typing import Dict, List, Optional, Tuple
  720. # import logging
  721. # from transformers import CLIPProcessor, CLIPModel
  722. # from sklearn.cluster import KMeans
  723. # logger = logging.getLogger(__name__)
  724. # class VisualProcessingService:
  725. # """Enhanced service for extracting visual attributes from product images using CLIP."""
  726. # # Class-level caching (shared across instances)
  727. # _clip_model = None
  728. # _clip_processor = None
  729. # _device = None
  730. # # ==================== EXPANDED TAXONOMY ====================
  731. # # Base color vocabulary
  732. # COLORS = ["black", "white", "red", "blue", "green", "yellow", "gray",
  733. # "brown", "pink", "purple", "orange", "beige", "navy", "teal"]
  734. # # Pattern vocabulary
  735. # PATTERNS = ["solid", "striped", "checked", "plaid", "floral", "graphic",
  736. # "polka dot", "camo", "tie-dye", "abstract", "geometric"]
  737. # # Material vocabulary (extended)
  738. # MATERIALS = ["cotton", "polyester", "denim", "leather", "wool", "canvas",
  739. # "silicone", "metal", "fabric", "rubber", "plastic", "wood",
  740. # "glass", "ceramic", "steel", "foam", "aluminum", "carbon fiber"]
  741. # # Style vocabulary
  742. # STYLES = ["casual", "formal", "sporty", "streetwear", "elegant", "vintage",
  743. # "modern", "bohemian", "minimalist", "industrial", "rustic", "contemporary"]
  744. # # Fit vocabulary
  745. # FITS = ["slim fit", "regular fit", "loose fit", "oversized", "tailored",
  746. # "relaxed", "athletic fit"]
  747. # # Brand vocabulary (common marketplace brands)
  748. # BRANDS = ["nike", "adidas", "sony", "samsung", "apple", "generic", "lego",
  749. # "hasbro", "lg", "panasonic", "microsoft"]
  750. # # Age group vocabulary
  751. # AGE_GROUPS = ["baby", "toddler", "child", "teen", "adult", "all ages"]
  752. # # Comprehensive category-specific attributes
  753. # CATEGORY_ATTRIBUTES = {
  754. # # ==================== CLOTHING ====================
  755. # "clothing": {
  756. # "products": ["t-shirt", "shirt", "dress", "pants", "jeans", "shorts",
  757. # "skirt", "jacket", "coat", "sweater", "hoodie", "top",
  758. # "blouse", "cardigan", "blazer"],
  759. # "attributes": {
  760. # "color": COLORS,
  761. # "pattern": PATTERNS,
  762. # "material": ["cotton", "polyester", "denim", "leather", "silk",
  763. # "wool", "linen", "blend", "canvas"],
  764. # "style": STYLES,
  765. # "fit": FITS,
  766. # "neckline": ["crew neck", "v-neck", "round neck", "collar",
  767. # "scoop neck", "boat neck", "turtleneck"],
  768. # "sleeve_type": ["short sleeve", "long sleeve", "sleeveless",
  769. # "3/4 sleeve", "cap sleeve"],
  770. # "closure_type": ["button", "zipper", "pull-on", "snap", "tie", "buckle"]
  771. # }
  772. # },
  773. # # ==================== FOOTWEAR ====================
  774. # "footwear": {
  775. # "products": ["shoes", "sneakers", "sandals", "boots", "slippers",
  776. # "heels", "loafers"],
  777. # "attributes": {
  778. # "color": COLORS,
  779. # "material": ["leather", "synthetic", "canvas", "rubber", "suede", "fabric"],
  780. # "type": ["sneakers", "sandals", "formal", "boots", "sports", "casual"],
  781. # "style": STYLES,
  782. # "closure_type": ["lace-up", "slip-on", "velcro", "zipper", "buckle"]
  783. # }
  784. # },
  785. # # ==================== ACCESSORIES ====================
  786. # "accessories": {
  787. # "products": ["watch", "bag", "backpack", "handbag", "wallet", "belt",
  788. # "sunglasses", "hat", "scarf"],
  789. # "attributes": {
  790. # "color": COLORS,
  791. # "material": ["leather", "fabric", "metal", "plastic", "canvas", "synthetic"],
  792. # "style": STYLES,
  793. # "type": ["backpack", "tote", "crossbody", "messenger", "duffel"]
  794. # }
  795. # },
  796. # # ==================== JEWELRY ====================
  797. # "jewelry": {
  798. # "products": ["necklace", "ring", "bracelet", "earrings", "pendant", "chain"],
  799. # "attributes": {
  800. # "material": ["gold", "silver", "platinum", "stainless steel",
  801. # "plastic", "beads", "leather"],
  802. # "style": ["modern", "vintage", "minimalist", "statement", "elegant"],
  803. # "type": ["chain", "band", "solitaire", "hoop", "stud"]
  804. # }
  805. # },
  806. # # ==================== ELECTRONICS ====================
  807. # "electronics": {
  808. # "products": ["phone", "smartphone", "tablet", "laptop", "headphones",
  809. # "camera", "tv", "monitor", "keyboard", "mouse", "speaker",
  810. # "smartwatch", "charger"],
  811. # "attributes": {
  812. # "color": COLORS,
  813. # "material": ["plastic", "metal", "glass", "aluminum", "rubber"],
  814. # "style": ["modern", "minimalist", "sleek", "industrial"],
  815. # "finish": ["matte", "glossy", "metallic", "textured"],
  816. # "type": ["over-ear", "in-ear", "on-ear", "wireless", "wired"],
  817. # "brand": BRANDS
  818. # }
  819. # },
  820. # # ==================== FURNITURE ====================
  821. # "furniture": {
  822. # "products": ["chair", "table", "sofa", "bed", "desk", "shelf",
  823. # "cabinet", "dresser", "bench", "stool", "bookshelf"],
  824. # "attributes": {
  825. # "color": COLORS,
  826. # "material": ["wood", "metal", "glass", "plastic", "fabric", "leather"],
  827. # "style": ["modern", "traditional", "industrial", "rustic",
  828. # "contemporary", "vintage", "minimalist"],
  829. # "finish": ["natural wood", "painted", "stained", "laminated", "upholstered"]
  830. # }
  831. # },
  832. # # ==================== HOME DECOR ====================
  833. # "home_decor": {
  834. # "products": ["painting", "canvas", "wall art", "frame", "vase", "lamp",
  835. # "mirror", "clock", "sculpture", "poster", "cushion", "rug"],
  836. # "attributes": {
  837. # "color": COLORS,
  838. # "style": ["modern", "abstract", "traditional", "contemporary",
  839. # "vintage", "minimalist", "bohemian"],
  840. # "material": ["canvas", "wood", "metal", "glass", "ceramic", "paper", "fabric"],
  841. # "finish": ["glossy", "matte", "textured", "framed"],
  842. # "theme": ["nature", "geometric", "floral", "landscape", "abstract"]
  843. # }
  844. # },
  845. # # ==================== KITCHEN ====================
  846. # "kitchen": {
  847. # "products": ["pot", "pan", "knife", "utensil", "plate", "bowl", "cup",
  848. # "mug", "bottle", "container", "cutting board"],
  849. # "attributes": {
  850. # "material": ["stainless steel", "aluminum", "ceramic", "glass",
  851. # "plastic", "wood", "silicone"],
  852. # "finish": ["non-stick", "stainless", "enameled", "anodized"],
  853. # "type": ["frypan", "saucepan", "chef knife", "utility", "mixing"]
  854. # }
  855. # },
  856. # # ==================== APPLIANCES ====================
  857. # "appliances": {
  858. # "products": ["microwave", "blender", "vacuum", "fan", "toaster",
  859. # "coffee maker", "iron", "hair dryer"],
  860. # "attributes": {
  861. # "color": COLORS,
  862. # "type": ["upright", "robot", "handheld", "ceiling", "table", "tower"],
  863. # "power": ["low", "medium", "high", "variable"],
  864. # "brand": BRANDS
  865. # }
  866. # },
  867. # # ==================== BEAUTY & PERSONAL CARE ====================
  868. # "beauty": {
  869. # "products": ["lipstick", "perfume", "lotion", "hair dryer", "makeup",
  870. # "skincare", "nail polish", "shampoo"],
  871. # "attributes": {
  872. # "color": COLORS,
  873. # "type": ["eau de parfum", "eau de toilette", "body spray",
  874. # "body lotion", "face cream"],
  875. # "finish": ["matte", "glossy", "satin", "shimmer"]
  876. # }
  877. # },
  878. # # ==================== TOYS ====================
  879. # "toys": {
  880. # "products": ["doll", "puzzle", "board game", "action figure", "plush toy",
  881. # "toy car", "lego", "building blocks"],
  882. # "attributes": {
  883. # "color": COLORS,
  884. # "age_group": AGE_GROUPS,
  885. # "material": ["plastic", "wood", "fabric", "metal", "foam"],
  886. # "type": ["educational", "plush", "action", "vehicle", "puzzle", "board game"],
  887. # "brand": BRANDS
  888. # }
  889. # },
  890. # # ==================== SPORTS & OUTDOOR ====================
  891. # "sports": {
  892. # "products": ["bicycle", "football", "basketball", "tennis racket",
  893. # "yoga mat", "helmet", "skateboard", "dumbbells", "ball"],
  894. # "attributes": {
  895. # "color": COLORS,
  896. # "material": ["steel", "aluminum", "carbon fiber", "rubber",
  897. # "leather", "synthetic", "foam", "composite"],
  898. # "sport_type": ["football", "basketball", "tennis", "cycling",
  899. # "yoga", "gym", "outdoor", "fitness"],
  900. # "type": ["mountain", "road", "hybrid", "bmx", "indoor", "outdoor"],
  901. # "brand": BRANDS
  902. # }
  903. # },
  904. # # ==================== PET SUPPLIES ====================
  905. # "pet_supplies": {
  906. # "products": ["pet bed", "pet toy", "leash", "pet bowl", "collar",
  907. # "pet carrier"],
  908. # "attributes": {
  909. # "color": COLORS,
  910. # "material": ["fabric", "plastic", "metal", "nylon", "leather"],
  911. # "size": ["small", "medium", "large", "extra large"]
  912. # }
  913. # },
  914. # # ==================== BABY PRODUCTS ====================
  915. # "baby": {
  916. # "products": ["stroller", "baby bottle", "diaper", "crib", "high chair",
  917. # "baby carrier"],
  918. # "attributes": {
  919. # "color": COLORS,
  920. # "material": MATERIALS,
  921. # "type": ["full-size", "umbrella", "jogging", "disposable", "cloth"],
  922. # "age_group": ["newborn", "baby", "toddler"]
  923. # }
  924. # },
  925. # # ==================== TOOLS & HARDWARE ====================
  926. # "tools": {
  927. # "products": ["hammer", "drill", "screwdriver", "wrench", "saw",
  928. # "pliers", "measuring tape", "level"],
  929. # "attributes": {
  930. # "material": ["steel", "aluminum", "plastic", "wood", "rubber",
  931. # "chrome", "fiberglass"],
  932. # "type": ["manual", "electric", "cordless", "corded", "pneumatic"],
  933. # "finish": ["chrome plated", "powder coated", "stainless steel"],
  934. # "brand": BRANDS
  935. # }
  936. # },
  937. # # ==================== BOOKS & MEDIA ====================
  938. # "books_media": {
  939. # "products": ["book", "magazine", "dvd", "video game", "cd", "vinyl"],
  940. # "attributes": {
  941. # "type": ["paperback", "hardcover", "ebook", "audiobook"],
  942. # "genre": ["fiction", "non-fiction", "educational", "kids",
  943. # "action", "adventure", "sports", "rpg"]
  944. # }
  945. # },
  946. # # ==================== AUTOMOTIVE ====================
  947. # "automotive": {
  948. # "products": ["car accessory", "tire", "car seat", "steering wheel cover",
  949. # "floor mat"],
  950. # "attributes": {
  951. # "color": COLORS,
  952. # "material": ["rubber", "plastic", "fabric", "leather", "vinyl"],
  953. # "type": ["universal", "custom fit"]
  954. # }
  955. # },
  956. # # ==================== OFFICE SUPPLIES ====================
  957. # "office": {
  958. # "products": ["pen", "notebook", "folder", "desk organizer", "stapler",
  959. # "calculator", "paper"],
  960. # "attributes": {
  961. # "color": COLORS,
  962. # "material": ["paper", "plastic", "metal", "cardboard"],
  963. # "type": ["ruled", "blank", "grid", "dot grid"]
  964. # }
  965. # },
  966. # # ==================== GARDEN & OUTDOOR ====================
  967. # "garden": {
  968. # "products": ["plant pot", "garden tool", "watering can", "planter",
  969. # "garden hose", "lawn mower"],
  970. # "attributes": {
  971. # "color": COLORS,
  972. # "material": ["ceramic", "plastic", "metal", "terracotta", "wood"],
  973. # "type": ["indoor", "outdoor", "hanging", "standing"]
  974. # }
  975. # }
  976. # }
  977. # # Attribute-specific confidence thresholds
  978. # CONFIDENCE_THRESHOLDS = {
  979. # "color": 0.20,
  980. # "pattern": 0.25,
  981. # "material": 0.30,
  982. # "style": 0.20,
  983. # "fit": 0.25,
  984. # "brand": 0.40,
  985. # "type": 0.22,
  986. # "finish": 0.28,
  987. # "neckline": 0.23,
  988. # "sleeve_type": 0.23
  989. # }
  990. # def __init__(self):
  991. # pass
  992. # @classmethod
  993. # def _get_device(cls):
  994. # """Get optimal device."""
  995. # if cls._device is None:
  996. # cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  997. # logger.info(f"Visual Processing using device: {cls._device}")
  998. # return cls._device
  999. # @classmethod
  1000. # def _get_clip_model(cls):
  1001. # """Lazy load CLIP model with class-level caching."""
  1002. # if cls._clip_model is None:
  1003. # logger.info("Loading CLIP model (this may take a few minutes on first use)...")
  1004. # cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  1005. # cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  1006. # device = cls._get_device()
  1007. # cls._clip_model.to(device)
  1008. # cls._clip_model.eval()
  1009. # logger.info("✓ CLIP model loaded successfully")
  1010. # return cls._clip_model, cls._clip_processor
  1011. # def center_crop(self, image: Image.Image, rel_crop: float = 0.7) -> Image.Image:
  1012. # """
  1013. # Center-crop to focus on the product area if there is too much background.
  1014. # Args:
  1015. # image: PIL Image
  1016. # rel_crop: Relative crop size (0.7 = 70% of min dimension)
  1017. # """
  1018. # w, h = image.size
  1019. # side = int(min(w, h) * rel_crop)
  1020. # left = (w - side) // 2
  1021. # top = (h - side) // 2
  1022. # return image.crop((left, top, left + side, top + side))
  1023. # def download_image(self, image_url: str, apply_crop: bool = False,
  1024. # max_size: Tuple[int, int] = (1024, 1024)) -> Optional[Image.Image]:
  1025. # """
  1026. # Download image from URL with optional preprocessing.
  1027. # Args:
  1028. # image_url: URL of the image
  1029. # apply_crop: Whether to apply center crop
  1030. # max_size: Maximum dimensions for resizing
  1031. # """
  1032. # try:
  1033. # response = requests.get(image_url, timeout=10)
  1034. # response.raise_for_status()
  1035. # image = Image.open(BytesIO(response.content)).convert('RGB')
  1036. # # Resize if too large
  1037. # if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
  1038. # image.thumbnail(max_size, Image.Resampling.LANCZOS)
  1039. # # Optional center crop
  1040. # if apply_crop:
  1041. # image = self.center_crop(image, rel_crop=0.7)
  1042. # return image
  1043. # except Exception as e:
  1044. # logger.error(f"Error downloading image from {image_url}: {str(e)}")
  1045. # return None
  1046. # def normalize_color(self, word: str) -> str:
  1047. # """
  1048. # Enhanced color normalization with aliases and modifiers.
  1049. # Args:
  1050. # word: Color word to normalize
  1051. # """
  1052. # w = word.lower().strip()
  1053. # # Remove light/dark modifiers
  1054. # w = w.replace("light ", "").replace("dark ", "")
  1055. # w = w.replace("bright ", "").replace("pale ", "")
  1056. # # Alias mapping
  1057. # aliases = {
  1058. # "grey": "gray",
  1059. # "navy": "blue",
  1060. # "navy blue": "blue",
  1061. # "maroon": "red",
  1062. # "crimson": "red",
  1063. # "scarlet": "red",
  1064. # "teal": "green",
  1065. # "turquoise": "blue",
  1066. # "cyan": "blue",
  1067. # "indigo": "blue",
  1068. # "violet": "purple",
  1069. # "lavender": "purple",
  1070. # "magenta": "pink",
  1071. # "off white": "white",
  1072. # "off-white": "white",
  1073. # "cream": "beige",
  1074. # "ivory": "white",
  1075. # "khaki": "beige",
  1076. # "tan": "brown",
  1077. # "bronze": "brown",
  1078. # "gold": "yellow",
  1079. # "silver": "gray",
  1080. # "charcoal": "gray"
  1081. # }
  1082. # normalized = aliases.get(w, w)
  1083. # # Validate against canonical colors
  1084. # if normalized not in [c.lower() for c in self.COLORS]:
  1085. # # Try first word if it's a compound
  1086. # first_word = normalized.split()[0] if ' ' in normalized else normalized
  1087. # if first_word in [c.lower() for c in self.COLORS]:
  1088. # return first_word
  1089. # return normalized
  1090. # def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
  1091. # """Extract dominant colors using K-means clustering."""
  1092. # try:
  1093. # # Resize for faster processing
  1094. # img_small = image.resize((150, 150))
  1095. # img_array = np.array(img_small)
  1096. # pixels = img_array.reshape(-1, 3)
  1097. # # Sample if too many pixels
  1098. # if len(pixels) > 10000:
  1099. # indices = np.random.choice(len(pixels), 10000, replace=False)
  1100. # pixels = pixels[indices]
  1101. # # K-means clustering
  1102. # kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5, max_iter=100)
  1103. # kmeans.fit(pixels)
  1104. # colors = []
  1105. # labels_counts = np.bincount(kmeans.labels_)
  1106. # for i, center in enumerate(kmeans.cluster_centers_):
  1107. # rgb = tuple(center.astype(int))
  1108. # color_name = self._get_color_name_simple(rgb)
  1109. # percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
  1110. # colors.append({
  1111. # "name": color_name,
  1112. # "rgb": rgb,
  1113. # "percentage": percentage
  1114. # })
  1115. # colors.sort(key=lambda x: x['percentage'], reverse=True)
  1116. # return colors
  1117. # except Exception as e:
  1118. # logger.error(f"Error extracting colors: {str(e)}")
  1119. # return []
  1120. # def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
  1121. # """Simple RGB to color name mapping."""
  1122. # r, g, b = rgb
  1123. # # Define color ranges
  1124. # colors = {
  1125. # 'black': (r < 50 and g < 50 and b < 50),
  1126. # 'white': (r > 200 and g > 200 and b > 200),
  1127. # 'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
  1128. # 'red': (r > 150 and g < 100 and b < 100),
  1129. # 'green': (g > 150 and r < 100 and b < 100),
  1130. # 'blue': (b > 150 and r < 100 and g < 100),
  1131. # 'yellow': (r > 200 and g > 200 and b < 100),
  1132. # 'orange': (r > 200 and 100 < g < 200 and b < 100),
  1133. # 'purple': (r > 100 and b > 100 and g < 100),
  1134. # 'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
  1135. # 'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
  1136. # 'beige': (150 < r < 220 and 140 < g < 200 and 100 < b < 180),
  1137. # }
  1138. # for color_name, condition in colors.items():
  1139. # if condition:
  1140. # return color_name
  1141. # # Fallback based on dominant channel
  1142. # if r > g and r > b:
  1143. # return 'red'
  1144. # elif g > r and g > b:
  1145. # return 'green'
  1146. # elif b > r and b > g:
  1147. # return 'blue'
  1148. # else:
  1149. # return 'gray'
  1150. # def classify_with_clip(
  1151. # self,
  1152. # image: Image.Image,
  1153. # candidates: List[str],
  1154. # attribute_name: str,
  1155. # confidence_threshold: Optional[float] = None
  1156. # ) -> Dict:
  1157. # """
  1158. # Use CLIP to classify image against candidate labels.
  1159. # Args:
  1160. # image: PIL Image
  1161. # candidates: List of text labels to classify against
  1162. # attribute_name: Name of the attribute being classified
  1163. # confidence_threshold: Override default threshold
  1164. # """
  1165. # try:
  1166. # model, processor = self._get_clip_model()
  1167. # device = self._get_device()
  1168. # # Use attribute-specific threshold if not provided
  1169. # if confidence_threshold is None:
  1170. # confidence_threshold = self.CONFIDENCE_THRESHOLDS.get(attribute_name, 0.20)
  1171. # # Prepare inputs
  1172. # inputs = processor(
  1173. # text=candidates,
  1174. # images=image,
  1175. # return_tensors="pt",
  1176. # padding=True
  1177. # )
  1178. # # Move to device
  1179. # inputs = {k: v.to(device) for k, v in inputs.items()}
  1180. # # Get predictions
  1181. # with torch.no_grad():
  1182. # outputs = model(**inputs)
  1183. # logits_per_image = outputs.logits_per_image
  1184. # probs = logits_per_image.softmax(dim=1).cpu()
  1185. # # Get top predictions
  1186. # top_k = min(3, len(candidates))
  1187. # top_probs, top_indices = torch.topk(probs[0], k=top_k)
  1188. # results = []
  1189. # for prob, idx in zip(top_probs, top_indices):
  1190. # if prob.item() > confidence_threshold:
  1191. # value = candidates[idx.item()]
  1192. # # Apply color normalization if color attribute
  1193. # if attribute_name == "color":
  1194. # value = self.normalize_color(value)
  1195. # results.append({
  1196. # "value": value,
  1197. # "confidence": float(prob.item())
  1198. # })
  1199. # return {
  1200. # "attribute": attribute_name,
  1201. # "predictions": results
  1202. # }
  1203. # except Exception as e:
  1204. # logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
  1205. # return {"attribute": attribute_name, "predictions": []}
  1206. # def detect_category_hierarchical(self, image: Image.Image) -> Tuple[str, str, float]:
  1207. # """
  1208. # Two-stage hierarchical product detection:
  1209. # 1. Detect broad category
  1210. # 2. Detect specific product within that category
  1211. # Returns:
  1212. # (category, product_type, confidence)
  1213. # """
  1214. # # Stage 1: Detect broad category
  1215. # category_names = list(self.CATEGORY_ATTRIBUTES.keys())
  1216. # category_labels = [f"a photo of {cat.replace('_', ' ')}" for cat in category_names]
  1217. # category_result = self.classify_with_clip(
  1218. # image, category_labels, "category_detection", confidence_threshold=0.15
  1219. # )
  1220. # if not category_result["predictions"]:
  1221. # return "unknown", "unknown", 0.0
  1222. # # Extract category
  1223. # best_category_match = category_result["predictions"][0]
  1224. # detected_category = category_names[category_labels.index(best_category_match["value"])]
  1225. # category_confidence = best_category_match["confidence"]
  1226. # # Stage 2: Detect specific product within category
  1227. # products_in_category = self.CATEGORY_ATTRIBUTES[detected_category]["products"]
  1228. # product_labels = [f"a photo of a {p}" for p in products_in_category]
  1229. # product_result = self.classify_with_clip(
  1230. # image, product_labels, "product_detection", confidence_threshold=0.15
  1231. # )
  1232. # if product_result["predictions"]:
  1233. # best_product = product_result["predictions"][0]
  1234. # product_type = products_in_category[product_labels.index(best_product["value"])]
  1235. # product_confidence = best_product["confidence"]
  1236. # # Combined confidence (geometric mean for balance)
  1237. # combined_confidence = (category_confidence * product_confidence) ** 0.5
  1238. # logger.info(f"Detected: {detected_category} → {product_type} (confidence: {combined_confidence:.3f})")
  1239. # return detected_category, product_type, combined_confidence
  1240. # return detected_category, "unknown", category_confidence * 0.5
  1241. # def detect_category_flat(self, image: Image.Image) -> Tuple[str, str, float]:
  1242. # """
  1243. # Single-stage flat product detection across all categories.
  1244. # Faster but potentially less accurate.
  1245. # Returns:
  1246. # (category, product_type, confidence)
  1247. # """
  1248. # # Collect all products with their categories
  1249. # all_products = []
  1250. # product_to_category = {}
  1251. # for category, data in self.CATEGORY_ATTRIBUTES.items():
  1252. # for product in data["products"]:
  1253. # label = f"a photo of a {product}"
  1254. # all_products.append(label)
  1255. # product_to_category[label] = category
  1256. # # Classify
  1257. # result = self.classify_with_clip(
  1258. # image, all_products, "product_detection", confidence_threshold=0.15
  1259. # )
  1260. # if result["predictions"]:
  1261. # best_match = result["predictions"][0]
  1262. # product_label = best_match["value"]
  1263. # category = product_to_category[product_label]
  1264. # product_type = product_label.replace("a photo of a ", "")
  1265. # confidence = best_match["confidence"]
  1266. # logger.info(f"Detected: {category} → {product_type} (confidence: {confidence:.3f})")
  1267. # return category, product_type, confidence
  1268. # return "unknown", "unknown", 0.0
  1269. # def process_image(
  1270. # self,
  1271. # image_url: str,
  1272. # product_type_hint: Optional[str] = None,
  1273. # apply_crop: bool = False,
  1274. # detection_mode: str = "hierarchical"
  1275. # ) -> Dict:
  1276. # """
  1277. # Main method to process image and extract visual attributes.
  1278. # Args:
  1279. # image_url: URL of the product image
  1280. # product_type_hint: Optional hint about product type
  1281. # apply_crop: Whether to apply center crop for better focus
  1282. # detection_mode: "hierarchical" (slower, more accurate) or "flat" (faster)
  1283. # """
  1284. # import time
  1285. # start_time = time.time()
  1286. # try:
  1287. # # Download image
  1288. # image = self.download_image(image_url, apply_crop=apply_crop)
  1289. # if image is None:
  1290. # return {
  1291. # "visual_attributes": {},
  1292. # "error": "Failed to download image"
  1293. # }
  1294. # visual_attributes = {}
  1295. # detailed_predictions = {}
  1296. # # Step 1: Detect product category and type
  1297. # if detection_mode == "hierarchical":
  1298. # detected_category, detected_product_type, category_confidence = \
  1299. # self.detect_category_hierarchical(image)
  1300. # else:
  1301. # detected_category, detected_product_type, category_confidence = \
  1302. # self.detect_category_flat(image)
  1303. # # If confidence is too low, return minimal info
  1304. # if category_confidence < 0.12:
  1305. # logger.warning(f"Low confidence ({category_confidence:.3f}). Returning basic attributes only.")
  1306. # colors = self.extract_dominant_colors(image, n_colors=3)
  1307. # if colors:
  1308. # visual_attributes["primary_color"] = colors[0]["name"]
  1309. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  1310. # return {
  1311. # "visual_attributes": visual_attributes,
  1312. # "category_confidence": category_confidence,
  1313. # "processing_time": round(time.time() - start_time, 2),
  1314. # "warning": "Low confidence detection"
  1315. # }
  1316. # # Add detected information
  1317. # visual_attributes["product_type"] = detected_product_type
  1318. # visual_attributes["category"] = detected_category
  1319. # visual_attributes["detection_confidence"] = round(category_confidence, 3)
  1320. # # Step 2: Extract universal color attribute
  1321. # colors = self.extract_dominant_colors(image, n_colors=3)
  1322. # if colors:
  1323. # visual_attributes["primary_color"] = colors[0]["name"]
  1324. # visual_attributes["color_palette"] = [c["name"] for c in colors]
  1325. # visual_attributes["color_distribution"] = [
  1326. # {"name": c["name"], "percentage": round(c["percentage"], 1)}
  1327. # for c in colors
  1328. # ]
  1329. # # Step 3: Extract category-specific attributes
  1330. # if detected_category in self.CATEGORY_ATTRIBUTES:
  1331. # category_config = self.CATEGORY_ATTRIBUTES[detected_category]
  1332. # for attr_name, attr_values in category_config["attributes"].items():
  1333. # # Skip color since we already extracted it
  1334. # if attr_name == "color":
  1335. # continue
  1336. # # Get attribute-specific threshold
  1337. # threshold = self.CONFIDENCE_THRESHOLDS.get(attr_name, 0.20)
  1338. # # Classify
  1339. # result = self.classify_with_clip(
  1340. # image, attr_values, attr_name, confidence_threshold=threshold
  1341. # )
  1342. # detailed_predictions[attr_name] = result
  1343. # # Only add if confidence is reasonable
  1344. # if result["predictions"]:
  1345. # best_prediction = result["predictions"][0]
  1346. # if best_prediction["confidence"] > threshold:
  1347. # visual_attributes[attr_name] = best_prediction["value"]
  1348. # processing_time = time.time() - start_time
  1349. # return {
  1350. # "visual_attributes": visual_attributes,
  1351. # "detailed_predictions": detailed_predictions,
  1352. # "detection_confidence": round(category_confidence, 3),
  1353. # "processing_time": round(processing_time, 2),
  1354. # "metadata": {
  1355. # "detection_mode": detection_mode,
  1356. # "crop_applied": apply_crop,
  1357. # "image_size": image.size
  1358. # }
  1359. # }
  1360. # except Exception as e:
  1361. # logger.error(f"Error processing image: {str(e)}")
  1362. # import traceback
  1363. # traceback.print_exc()
  1364. # return {
  1365. # "visual_attributes": {},
  1366. # "error": str(e),
  1367. # "processing_time": round(time.time() - start_time, 2)
  1368. # }
  1369. # def batch_process_images(
  1370. # self,
  1371. # image_urls: List[str],
  1372. # detection_mode: str = "flat"
  1373. # ) -> List[Dict]:
  1374. # """
  1375. # Process multiple images in batch.
  1376. # Args:
  1377. # image_urls: List of image URLs
  1378. # detection_mode: Detection mode to use
  1379. # """
  1380. # results = []
  1381. # for i, url in enumerate(image_urls):
  1382. # logger.info(f"Processing image {i+1}/{len(image_urls)}: {url}")
  1383. # result = self.process_image(url, detection_mode=detection_mode)
  1384. # results.append(result)
  1385. # return results
  1386. # @classmethod
  1387. # def cleanup_models(cls):
  1388. # """Free up memory by unloading models."""
  1389. # if cls._clip_model is not None:
  1390. # del cls._clip_model
  1391. # del cls._clip_processor
  1392. # cls._clip_model = None
  1393. # cls._clip_processor = None
  1394. # if torch.cuda.is_available():
  1395. # torch.cuda.empty_cache()
  1396. # logger.info("Models unloaded and memory freed")
  1397. # def get_supported_categories(self) -> List[str]:
  1398. # """Get list of all supported product categories."""
  1399. # return list(self.CATEGORY_ATTRIBUTES.keys())
  1400. # def get_category_products(self, category: str) -> List[str]:
  1401. # """Get list of products in a specific category."""
  1402. # return self.CATEGORY_ATTRIBUTES.get(category, {}).get("products", [])
  1403. # def get_category_attributes(self, category: str) -> Dict[str, List[str]]:
  1404. # """Get attribute schema for a specific category."""
  1405. # return self.CATEGORY_ATTRIBUTES.get(category, {}).get("attributes", {})
  1406. # def get_statistics(self) -> Dict:
  1407. # """Get statistics about the taxonomy."""
  1408. # total_products = sum(
  1409. # len(data["products"])
  1410. # for data in self.CATEGORY_ATTRIBUTES.values()
  1411. # )
  1412. # total_attributes = sum(
  1413. # len(data["attributes"])
  1414. # for data in self.CATEGORY_ATTRIBUTES.values()
  1415. # )
  1416. # return {
  1417. # "total_categories": len(self.CATEGORY_ATTRIBUTES),
  1418. # "total_products": total_products,
  1419. # "total_unique_attributes": len(set(
  1420. # attr
  1421. # for data in self.CATEGORY_ATTRIBUTES.values()
  1422. # for attr in data["attributes"].keys()
  1423. # )),
  1424. # "categories": list(self.CATEGORY_ATTRIBUTES.keys())
  1425. # }
  1426. # # ==================== USAGE EXAMPLES ====================
  1427. # def example_basic_usage():
  1428. # """Basic usage example."""
  1429. # print("=== Basic Usage Example ===\n")
  1430. # # Initialize service
  1431. # service = VisualProcessingService()
  1432. # # Process single image (hierarchical mode - more accurate)
  1433. # result = service.process_image(
  1434. # "https://example.com/product.jpg",
  1435. # detection_mode="hierarchical"
  1436. # )
  1437. # print("Product Type:", result["visual_attributes"].get("product_type"))
  1438. # print("Category:", result["visual_attributes"].get("category"))
  1439. # print("Primary Color:", result["visual_attributes"].get("primary_color"))
  1440. # print("Detection Confidence:", result.get("detection_confidence"))
  1441. # print("Processing Time:", result["processing_time"], "seconds")
  1442. # print("\nAll Attributes:")
  1443. # for key, value in result["visual_attributes"].items():
  1444. # print(f" {key}: {value}")
  1445. # def example_fast_mode():
  1446. # """Fast processing mode example."""
  1447. # print("\n=== Fast Mode Example ===\n")
  1448. # service = VisualProcessingService()
  1449. # # Fast mode (flat detection)
  1450. # result = service.process_image(
  1451. # "https://example.com/product.jpg",
  1452. # detection_mode="flat" # Faster, single-stage detection
  1453. # )
  1454. # print("Processing Time:", result["processing_time"], "seconds")
  1455. # print("Detected:", result["visual_attributes"])
  1456. # def example_with_cropping():
  1457. # """Example with center cropping for busy backgrounds."""
  1458. # print("\n=== With Center Cropping ===\n")
  1459. # service = VisualProcessingService()
  1460. # # Apply center crop to focus on product
  1461. # result = service.process_image(
  1462. # "https://example.com/product-with-background.jpg",
  1463. # apply_crop=True, # Enable center cropping
  1464. # detection_mode="hierarchical"
  1465. # )
  1466. # print("Crop Applied:", result["metadata"]["crop_applied"])
  1467. # print("Detected:", result["visual_attributes"])
  1468. # def example_batch_processing():
  1469. # """Batch processing example."""
  1470. # print("\n=== Batch Processing ===\n")
  1471. # service = VisualProcessingService()
  1472. # image_urls = [
  1473. # "https://example.com/product1.jpg",
  1474. # "https://example.com/product2.jpg",
  1475. # "https://example.com/product3.jpg"
  1476. # ]
  1477. # results = service.batch_process_images(image_urls, detection_mode="flat")
  1478. # for i, result in enumerate(results):
  1479. # print(f"\nProduct {i+1}:")
  1480. # print(f" Type: {result['visual_attributes'].get('product_type')}")
  1481. # print(f" Category: {result['visual_attributes'].get('category')}")
  1482. # print(f" Time: {result['processing_time']}s")
  1483. # def example_category_info():
  1484. # """Get information about supported categories."""
  1485. # print("\n=== Category Information ===\n")
  1486. # service = VisualProcessingService()
  1487. # # Get statistics
  1488. # stats = service.get_statistics()
  1489. # print("Statistics:")
  1490. # print(f" Total Categories: {stats['total_categories']}")
  1491. # print(f" Total Products: {stats['total_products']}")
  1492. # print(f" Unique Attributes: {stats['total_unique_attributes']}")
  1493. # # Get all categories
  1494. # categories = service.get_supported_categories()
  1495. # print(f"\nSupported Categories ({len(categories)}):")
  1496. # for cat in categories:
  1497. # products = service.get_category_products(cat)
  1498. # print(f" {cat}: {len(products)} products")
  1499. # # Get attributes for a specific category
  1500. # print("\nClothing Category Attributes:")
  1501. # clothing_attrs = service.get_category_attributes("clothing")
  1502. # for attr, values in clothing_attrs.items():
  1503. # print(f" {attr}: {len(values)} options")
  1504. # def example_detailed_predictions():
  1505. # """Example showing detailed predictions with confidence scores."""
  1506. # print("\n=== Detailed Predictions ===\n")
  1507. # service = VisualProcessingService()
  1508. # result = service.process_image(
  1509. # "https://example.com/product.jpg",
  1510. # detection_mode="hierarchical"
  1511. # )
  1512. # print("Visual Attributes (Best Predictions):")
  1513. # for key, value in result["visual_attributes"].items():
  1514. # print(f" {key}: {value}")
  1515. # print("\nDetailed Predictions (Top 3 for each attribute):")
  1516. # for attr_name, predictions in result.get("detailed_predictions", {}).items():
  1517. # print(f"\n {attr_name}:")
  1518. # for pred in predictions.get("predictions", []):
  1519. # print(f" - {pred['value']}: {pred['confidence']:.3f}")
  1520. # def example_color_distribution():
  1521. # """Example showing color palette extraction."""
  1522. # print("\n=== Color Distribution ===\n")
  1523. # service = VisualProcessingService()
  1524. # result = service.process_image("https://example.com/product.jpg")
  1525. # print("Primary Color:", result["visual_attributes"].get("primary_color"))
  1526. # print("\nColor Palette:")
  1527. # for color in result["visual_attributes"].get("color_palette", []):
  1528. # print(f" - {color}")
  1529. # print("\nColor Distribution:")
  1530. # for color_info in result["visual_attributes"].get("color_distribution", []):
  1531. # print(f" {color_info['name']}: {color_info['percentage']}%")
  1532. # def example_error_handling():
  1533. # """Example showing error handling."""
  1534. # print("\n=== Error Handling ===\n")
  1535. # service = VisualProcessingService()
  1536. # # Invalid URL
  1537. # result = service.process_image("https://invalid-url.com/nonexistent.jpg")
  1538. # if "error" in result:
  1539. # print("Error occurred:", result["error"])
  1540. # else:
  1541. # print("Processing successful")
  1542. # # Low confidence warning
  1543. # result = service.process_image("https://example.com/ambiguous-product.jpg")
  1544. # if "warning" in result:
  1545. # print("Warning:", result["warning"])
  1546. # print("Confidence:", result.get("category_confidence"))
  1547. # def example_cleanup():
  1548. # """Example showing model cleanup."""
  1549. # print("\n=== Model Cleanup ===\n")
  1550. # service = VisualProcessingService()
  1551. # # Process some images
  1552. # result = service.process_image("https://example.com/product.jpg")
  1553. # print("Processed successfully")
  1554. # # Clean up models when done (frees memory)
  1555. # VisualProcessingService.cleanup_models()
  1556. # print("Models cleaned up and memory freed")
  1557. # # ==================== PRODUCTION USAGE ====================
  1558. # def production_example():
  1559. # """
  1560. # Production-ready example with proper error handling and logging.
  1561. # """
  1562. # import logging
  1563. # # Setup logging
  1564. # logging.basicConfig(
  1565. # level=logging.INFO,
  1566. # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
  1567. # )
  1568. # service = VisualProcessingService()
  1569. # def process_product_image(image_url: str, product_id: str) -> Dict:
  1570. # """
  1571. # Process a product image with full error handling.
  1572. # """
  1573. # try:
  1574. # # Process with hierarchical mode for best accuracy
  1575. # result = service.process_image(
  1576. # image_url,
  1577. # detection_mode="hierarchical",
  1578. # apply_crop=False # Set True if images have busy backgrounds
  1579. # )
  1580. # # Check for errors
  1581. # if "error" in result:
  1582. # logger.error(f"Failed to process {product_id}: {result['error']}")
  1583. # return {
  1584. # "product_id": product_id,
  1585. # "status": "error",
  1586. # "error": result["error"]
  1587. # }
  1588. # # Check confidence
  1589. # confidence = result.get("detection_confidence", 0)
  1590. # if confidence < 0.15:
  1591. # logger.warning(f"Low confidence for {product_id}: {confidence}")
  1592. # return {
  1593. # "product_id": product_id,
  1594. # "status": "low_confidence",
  1595. # "confidence": confidence,
  1596. # "partial_attributes": result["visual_attributes"]
  1597. # }
  1598. # # Success
  1599. # return {
  1600. # "product_id": product_id,
  1601. # "status": "success",
  1602. # "attributes": result["visual_attributes"],
  1603. # "confidence": confidence,
  1604. # "processing_time": result["processing_time"]
  1605. # }
  1606. # except Exception as e:
  1607. # logger.exception(f"Unexpected error processing {product_id}")
  1608. # return {
  1609. # "product_id": product_id,
  1610. # "status": "exception",
  1611. # "error": str(e)
  1612. # }
  1613. # # Process products
  1614. # products = [
  1615. # {"id": "PROD001", "image_url": "https://example.com/tshirt.jpg"},
  1616. # {"id": "PROD002", "image_url": "https://example.com/laptop.jpg"},
  1617. # {"id": "PROD003", "image_url": "https://example.com/chair.jpg"}
  1618. # ]
  1619. # results = []
  1620. # for product in products:
  1621. # result = process_product_image(product["image_url"], product["id"])
  1622. # results.append(result)
  1623. # # Print summary
  1624. # if result["status"] == "success":
  1625. # attrs = result["attributes"]
  1626. # print(f"\n✓ {product['id']} ({result['processing_time']}s):")
  1627. # print(f" Type: {attrs.get('product_type')}")
  1628. # print(f" Category: {attrs.get('category')}")
  1629. # print(f" Color: {attrs.get('primary_color')}")
  1630. # else:
  1631. # print(f"\n✗ {product['id']}: {result['status']}")
  1632. # return results
  1633. # # ==================== MAIN ====================
  1634. # if __name__ == "__main__":
  1635. # # Run examples
  1636. # print("Enhanced Visual Processing Service")
  1637. # print("=" * 60)
  1638. # # Show statistics
  1639. # service = VisualProcessingService()
  1640. # stats = service.get_statistics()
  1641. # print(f"\nTaxonomy Coverage:")
  1642. # print(f" Categories: {stats['total_categories']}")
  1643. # print(f" Products: {stats['total_products']}")
  1644. # print(f" Attributes: {stats['total_unique_attributes']}")
  1645. # print("\n" + "=" * 60)
  1646. # print("Run individual examples by calling the example functions:")
  1647. # print(" - example_basic_usage()")
  1648. # print(" - example_fast_mode()")
  1649. # print(" - example_with_cropping()")
  1650. # print(" - example_batch_processing()")
  1651. # print(" - example_category_info()")
  1652. # print(" - example_detailed_predictions()")
  1653. # print(" - example_color_distribution()")
  1654. # print(" - production_example()")
  1655. # print("=" * 60)