3 月之前 · e3bba06e75
--- a/attr_extraction/services.py
+++ b/attr_extraction/services.py
@@ -1,2303 +1,3 @@
 
				-
			
 
				-
			
 
				-
			
 
				-# # # ==================== services.py ====================
			
 
				-# # import requests
			
 
				-# # import json
			
 
				-# # import re
			
 
				-# # from typing import Dict, List, Optional, Tuple
			
 
				-# # from django.conf import settings
			
 
				-# # from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				-# # from sentence_transformers import SentenceTransformer, util
			
 
				-# # import numpy as np
			
 
				-# # from .ocr_service import OCRService
			
 
				-
			
 
				-
			
 
				-# # # Initialize embedding model for normalization
			
 
				-# # model_embedder = SentenceTransformer("all-MiniLM-L6-v2")
			
 
				-
			
 
				-
			
 
				-# # class ProductAttributeService:
			
 
				-# #     """Service class for extracting product attributes using Groq LLM."""
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def normalize_dimension_text(text: str) -> str:
			
 
				-# #         """
			
 
				-# #         Normalize dimension text to match format like '16x20', '20x30', etc.
			
 
				-# #         Handles formats like '16 x 20', '16x1.5x20', '16 x 1.5 x 20 Inches'
			
 
				-# #         Returns the normalized dimension (e.g., '16x20') or empty string if not found.
			
 
				-# #         """
			
 
				-# #         if not text:
			
 
				-# #             return ""
			
 
				-        
			
 
				-# #         # Convert to lowercase and remove common units
			
 
				-# #         text = text.lower()
			
 
				-# #         text = re.sub(r'\s*(inches|inch|in|cm|centimeters|mm|millimeters)\s*', '', text, flags=re.IGNORECASE)
			
 
				-        
			
 
				-# #         # Extract all numbers from the text
			
 
				-# #         numbers = re.findall(r'\d+\.?\d*', text)
			
 
				-        
			
 
				-# #         if not numbers:
			
 
				-# #             return ""
			
 
				-        
			
 
				-# #         # Convert to floats first to handle decimals properly
			
 
				-# #         float_numbers = []
			
 
				-# #         for num in numbers:
			
 
				-# #             try:
			
 
				-# #                 float_numbers.append(float(num))
			
 
				-# #             except:
			
 
				-# #                 continue
			
 
				-        
			
 
				-# #         if len(float_numbers) < 2:
			
 
				-# #             return ""
			
 
				-        
			
 
				-# #         # If we have 3 dimensions, it's likely Width x Depth x Height
			
 
				-# #         # For wall art, depth is usually small (< 5), so we keep first and last
			
 
				-# #         if len(float_numbers) == 3:
			
 
				-# #             # Keep first and last values (width and height), skip middle (depth)
			
 
				-# #             float_numbers = [float_numbers[0], float_numbers[2]]
			
 
				-# #         elif len(float_numbers) > 3:
			
 
				-# #             # If more than 3 dimensions, keep the two largest
			
 
				-# #             float_numbers = sorted(float_numbers)[-2:]
			
 
				-# #         else:
			
 
				-# #             # Just 2 dimensions, use as is
			
 
				-# #             float_numbers = float_numbers[:2]
			
 
				-        
			
 
				-# #         # Format numbers: use integer if whole, else one decimal
			
 
				-# #         formatted_numbers = []
			
 
				-# #         for num in float_numbers:
			
 
				-# #             if num.is_integer():
			
 
				-# #                 formatted_numbers.append(str(int(num)))
			
 
				-# #             else:
			
 
				-# #                 formatted_numbers.append(f"{num:.1f}")
			
 
				-        
			
 
				-# #         # Sort to ensure consistent order (smaller x larger)
			
 
				-# #         formatted_numbers.sort(key=lambda x: float(x))
			
 
				-        
			
 
				-# #         # Return formatted dimension
			
 
				-# #         return f"{formatted_numbers[0]}x{formatted_numbers[1]}"
			
 
				-
			
 
				-    
			
 
				-    
			
 
				-# #     @staticmethod
			
 
				-# #     def normalize_value_for_matching(value: str, attr_name: str = "") -> str:
			
 
				-# #         """
			
 
				-# #         Normalize a value based on its attribute type for better matching.
			
 
				-# #         Currently handles dimensions specially, can be extended for other attributes.
			
 
				-# #         """
			
 
				-# #         # Check if this is a dimension-related attribute
			
 
				-# #         dimension_keywords = ['dimension', 'size', 'measurement']
			
 
				-# #         if any(keyword in attr_name.lower() for keyword in dimension_keywords):
			
 
				-# #             normalized = ProductAttributeService.normalize_dimension_text(value)
			
 
				-# #             if normalized:
			
 
				-# #                 return normalized
			
 
				-        
			
 
				-# #         # For other attributes, just return cleaned value
			
 
				-# #         return value.strip()
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def combine_product_text(
			
 
				-# #         title: Optional[str] = None,
			
 
				-# #         short_desc: Optional[str] = None,
			
 
				-# #         long_desc: Optional[str] = None,
			
 
				-# #         ocr_text: Optional[str] = None
			
 
				-# #     ) -> Tuple[str, Dict[str, str]]:
			
 
				-# #         """
			
 
				-# #         Combine product metadata into a single text block.
			
 
				-# #         Returns: (combined_text, source_map) where source_map tracks which text came from where
			
 
				-# #         """
			
 
				-# #         parts = []
			
 
				-# #         source_map = {}
			
 
				-        
			
 
				-# #         if title:
			
 
				-# #             title_str = str(title).strip()
			
 
				-# #             parts.append(f"Title: {title_str}")
			
 
				-# #             source_map['title'] = title_str
			
 
				-# #         if short_desc:
			
 
				-# #             short_str = str(short_desc).strip()
			
 
				-# #             parts.append(f"Description: {short_str}")
			
 
				-# #             source_map['short_desc'] = short_str
			
 
				-# #         if long_desc:
			
 
				-# #             long_str = str(long_desc).strip()
			
 
				-# #             parts.append(f"Details: {long_str}")
			
 
				-# #             source_map['long_desc'] = long_str
			
 
				-# #         if ocr_text:
			
 
				-# #             parts.append(f"OCR Text: {ocr_text}")
			
 
				-# #             source_map['ocr_text'] = ocr_text
			
 
				-        
			
 
				-# #         combined = "\n".join(parts).strip()
			
 
				-        
			
 
				-# #         if not combined:
			
 
				-# #             return "No product information available", {}
			
 
				-        
			
 
				-# #         return combined, source_map
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def find_value_source(value: str, source_map: Dict[str, str], attr_name: str = "") -> str:
			
 
				-# #         """
			
 
				-# #         Find which source(s) contain the given value.
			
 
				-# #         Returns the source name(s) where the value appears.
			
 
				-# #         Now handles normalized matching for dimensions.
			
 
				-# #         """
			
 
				-# #         value_lower = value.lower()
			
 
				-# #         # Split value into tokens for better matching
			
 
				-# #         value_tokens = set(value_lower.replace("-", " ").replace("x", " ").split())
			
 
				-        
			
 
				-# #         # Check if this is a dimension-related attribute
			
 
				-# #         is_dimension_attr = any(keyword in attr_name.lower() for keyword in ['dimension', 'size', 'measurement'])
			
 
				-        
			
 
				-# #         sources_found = []
			
 
				-# #         source_scores = {}
			
 
				-        
			
 
				-# #         for source_name, source_text in source_map.items():
			
 
				-# #             source_lower = source_text.lower()
			
 
				-            
			
 
				-# #             # Check for exact phrase match first
			
 
				-# #             if value_lower in source_lower:
			
 
				-# #                 source_scores[source_name] = 1.0
			
 
				-# #                 continue
			
 
				-            
			
 
				-# #             # For dimensions, check normalized match
			
 
				-# #             if is_dimension_attr:
			
 
				-# #                 # Normalize the value (e.g., "16x20" stays "16x20")
			
 
				-# #                 normalized_value = ProductAttributeService.normalize_dimension_text(value)
			
 
				-# #                 if not normalized_value:
			
 
				-# #                     normalized_value = value.replace("x", " ").strip()
			
 
				-                
			
 
				-# #                 # Normalize the source text to extract dimensions
			
 
				-# #                 normalized_source = ProductAttributeService.normalize_dimension_text(source_text)
			
 
				-                
			
 
				-# #                 # Direct match
			
 
				-# #                 if normalized_value == normalized_source:
			
 
				-# #                     source_scores[source_name] = 0.95
			
 
				-# #                     continue
			
 
				-                
			
 
				-# #                 # Also check if the dimension numbers appear in the source
			
 
				-# #                 # Extract dimension parts (e.g., "16x20" -> ["16", "20"])
			
 
				-# #                 dim_parts = normalized_value.split("x") if "x" in normalized_value else []
			
 
				-# #                 if len(dim_parts) == 2:
			
 
				-# #                     # Check if both numbers appear in the source
			
 
				-# #                     if all(part in source_text for part in dim_parts):
			
 
				-# #                         source_scores[source_name] = 0.85
			
 
				-# #                         continue
			
 
				-            
			
 
				-# #             # Check for token matches
			
 
				-# #             token_matches = sum(1 for token in value_tokens if token and token in source_lower)
			
 
				-# #             if token_matches > 0 and len(value_tokens) > 0:
			
 
				-# #                 source_scores[source_name] = token_matches / len(value_tokens)
			
 
				-        
			
 
				-# #         # Return source with highest score, or all sources if multiple have same score
			
 
				-# #         if source_scores:
			
 
				-# #             max_score = max(source_scores.values())
			
 
				-# #             sources_found = [s for s, score in source_scores.items() if score == max_score]
			
 
				-            
			
 
				-# #             # Prioritize: title > short_desc > long_desc > ocr_text
			
 
				-# #             priority = ['title', 'short_desc', 'long_desc', 'ocr_text']
			
 
				-# #             for p in priority:
			
 
				-# #                 if p in sources_found:
			
 
				-# #                     return p
			
 
				-            
			
 
				-# #             return sources_found[0] if sources_found else "Not found"
			
 
				-        
			
 
				-# #         return "Not found"
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def format_visual_attributes(visual_attributes: Dict) -> Dict:
			
 
				-# #         """
			
 
				-# #         Convert visual attributes to array format with source tracking.
			
 
				-# #         Source is always 'image' for visual attributes.
			
 
				-# #         """
			
 
				-# #         formatted = {}
			
 
				-        
			
 
				-# #         for key, value in visual_attributes.items():
			
 
				-# #             if isinstance(value, list):
			
 
				-# #                 # Already a list (like color_palette)
			
 
				-# #                 formatted[key] = [{"value": str(item), "source": "image"} for item in value]
			
 
				-# #             elif isinstance(value, dict):
			
 
				-# #                 # Nested dictionary - format recursively
			
 
				-# #                 nested_formatted = {}
			
 
				-# #                 for nested_key, nested_value in value.items():
			
 
				-# #                     if isinstance(nested_value, list):
			
 
				-# #                         nested_formatted[nested_key] = [{"value": str(item), "source": "image"} for item in nested_value]
			
 
				-# #                     else:
			
 
				-# #                         nested_formatted[nested_key] = [{"value": str(nested_value), "source": "image"}]
			
 
				-# #                 formatted[key] = nested_formatted
			
 
				-# #             else:
			
 
				-# #                 # Single value
			
 
				-# #                 formatted[key] = [{"value": str(value), "source": "image"}]
			
 
				-        
			
 
				-# #         return formatted
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def extract_attributes_from_ocr(ocr_results: Dict, model: str = None) -> Dict:
			
 
				-# #         """Extract structured attributes from OCR text using LLM."""
			
 
				-# #         if model is None:
			
 
				-# #             model = settings.SUPPORTED_MODELS[0]
			
 
				-        
			
 
				-# #         detected_text = ocr_results.get('detected_text', [])
			
 
				-# #         if not detected_text:
			
 
				-# #             return {}
			
 
				-        
			
 
				-# #         # Format OCR text for prompt
			
 
				-# #         ocr_text = "\n".join([f"Text: {item['text']}, Confidence: {item['confidence']:.2f}" 
			
 
				-# #                               for item in detected_text])
			
 
				-        
			
 
				-# #         prompt = f"""
			
 
				-# # You are an AI model that extracts structured attributes from OCR text detected on product images.
			
 
				-# # Given the OCR detections below, infer the possible product attributes and return them as a clean JSON object.
			
 
				-
			
 
				-# # OCR Text:
			
 
				-# # {ocr_text}
			
 
				-
			
 
				-# # Extract relevant attributes like:
			
 
				-# # - brand
			
 
				-# # - model_number
			
 
				-# # - size (waist_size, length, etc.)
			
 
				-# # - collection
			
 
				-# # - any other relevant product information
			
 
				-
			
 
				-# # Return a JSON object with only the attributes you can confidently identify.
			
 
				-# # If an attribute is not present, do not include it in the response.
			
 
				-# # """
			
 
				-        
			
 
				-# #         payload = {
			
 
				-# #             "model": model,
			
 
				-# #             "messages": [
			
 
				-# #                 {
			
 
				-# #                     "role": "system",
			
 
				-# #                     "content": "You are a helpful AI that extracts structured data from OCR output. Return only valid JSON."
			
 
				-# #                 },
			
 
				-# #                 {"role": "user", "content": prompt}
			
 
				-# #             ],
			
 
				-# #             "temperature": 0.2,
			
 
				-# #             "max_tokens": 500
			
 
				-# #         }
			
 
				-        
			
 
				-# #         headers = {
			
 
				-# #             "Authorization": f"Bearer {settings.GROQ_API_KEY}",
			
 
				-# #             "Content-Type": "application/json",
			
 
				-# #         }
			
 
				-        
			
 
				-# #         try:
			
 
				-# #             response = requests.post(
			
 
				-# #                 settings.GROQ_API_URL,
			
 
				-# #                 headers=headers,
			
 
				-# #                 json=payload,
			
 
				-# #                 timeout=30
			
 
				-# #             )
			
 
				-# #             response.raise_for_status()
			
 
				-# #             result_text = response.json()["choices"][0]["message"]["content"].strip()
			
 
				-            
			
 
				-# #             # Clean and parse JSON
			
 
				-# #             result_text = ProductAttributeService._clean_json_response(result_text)
			
 
				-# #             parsed = json.loads(result_text)
			
 
				-            
			
 
				-# #             # Convert to array format with source tracking
			
 
				-# #             formatted_attributes = {}
			
 
				-# #             for key, value in parsed.items():
			
 
				-# #                 if key == "error":
			
 
				-# #                     continue
			
 
				-                
			
 
				-# #                 # Handle nested dictionaries (like size)
			
 
				-# #                 if isinstance(value, dict):
			
 
				-# #                     nested_formatted = {}
			
 
				-# #                     for nested_key, nested_value in value.items():
			
 
				-# #                         nested_formatted[nested_key] = [{"value": str(nested_value), "source": "image"}]
			
 
				-# #                     formatted_attributes[key] = nested_formatted
			
 
				-# #                 elif isinstance(value, list):
			
 
				-# #                     # Already a list, convert each item
			
 
				-# #                     formatted_attributes[key] = [{"value": str(item), "source": "image"} for item in value]
			
 
				-# #                 else:
			
 
				-# #                     # Single value
			
 
				-# #                     formatted_attributes[key] = [{"value": str(value), "source": "image"}]
			
 
				-            
			
 
				-# #             return formatted_attributes
			
 
				-# #         except Exception as e:
			
 
				-# #             return {"error": f"Failed to extract attributes from OCR: {str(e)}"}
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def calculate_attribute_relationships(
			
 
				-# #         mandatory_attrs: Dict[str, List[str]],
			
 
				-# #         product_text: str
			
 
				-# #     ) -> Dict[str, float]:
			
 
				-# #         """
			
 
				-# #         Calculate semantic relationships between attribute values across different attributes.
			
 
				-# #         Returns a matrix of cross-attribute value similarities.
			
 
				-# #         """
			
 
				-# #         pt_emb = model_embedder.encode(product_text, convert_to_tensor=True)
			
 
				-
			
 
				-# #         # Calculate similarities between all attribute values and product text
			
 
				-# #         attr_scores = {}
			
 
				-# #         for attr, values in mandatory_attrs.items():
			
 
				-# #             attr_scores[attr] = {}
			
 
				-# #             for val in values:
			
 
				-# #                 contexts = [val, f"for {val}", f"use in {val}", f"suitable for {val}"]
			
 
				-# #                 ctx_embs = [model_embedder.encode(c, convert_to_tensor=True) for c in contexts]
			
 
				-# #                 sem_sim = max(float(util.cos_sim(pt_emb, ce).item()) for ce in ctx_embs)
			
 
				-# #                 attr_scores[attr][val] = sem_sim
			
 
				-
			
 
				-# #         # Calculate cross-attribute value relationships
			
 
				-# #         relationships = {}
			
 
				-# #         attr_list = list(mandatory_attrs.keys())
			
 
				-
			
 
				-# #         for i, attr1 in enumerate(attr_list):
			
 
				-# #             for attr2 in attr_list[i+1:]:
			
 
				-# #                 # Calculate pairwise similarities between values of different attributes
			
 
				-# #                 for val1 in mandatory_attrs[attr1]:
			
 
				-# #                     for val2 in mandatory_attrs[attr2]:
			
 
				-# #                         emb1 = model_embedder.encode(val1, convert_to_tensor=True)
			
 
				-# #                         emb2 = model_embedder.encode(val2, convert_to_tensor=True)
			
 
				-# #                         sim = float(util.cos_sim(emb1, emb2).item())
			
 
				-
			
 
				-# #                         # Store bidirectional relationships
			
 
				-# #                         key1 = f"{attr1}:{val1}->{attr2}:{val2}"
			
 
				-# #                         key2 = f"{attr2}:{val2}->{attr1}:{val1}"
			
 
				-# #                         relationships[key1] = sim
			
 
				-# #                         relationships[key2] = sim
			
 
				-
			
 
				-# #         return relationships
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def calculate_value_clusters(
			
 
				-# #         values: List[str],
			
 
				-# #         scores: List[Tuple[str, float]],
			
 
				-# #         cluster_threshold: float = 0.4
			
 
				-# #     ) -> List[List[str]]:
			
 
				-# #         """
			
 
				-# #         Group values into semantic clusters based on their similarity to each other.
			
 
				-# #         Returns clusters of related values.
			
 
				-# #         """
			
 
				-# #         if len(values) <= 1:
			
 
				-# #             return [[val] for val, _ in scores]
			
 
				-
			
 
				-# #         # Get embeddings for all values
			
 
				-# #         embeddings = [model_embedder.encode(val, convert_to_tensor=True) for val in values]
			
 
				-
			
 
				-# #         # Calculate pairwise similarities
			
 
				-# #         similarity_matrix = np.zeros((len(values), len(values)))
			
 
				-# #         for i in range(len(values)):
			
 
				-# #             for j in range(i+1, len(values)):
			
 
				-# #                 sim = float(util.cos_sim(embeddings[i], embeddings[j]).item())
			
 
				-# #                 similarity_matrix[i][j] = sim
			
 
				-# #                 similarity_matrix[j][i] = sim
			
 
				-
			
 
				-# #         # Simple clustering: group values with high similarity
			
 
				-# #         clusters = []
			
 
				-# #         visited = set()
			
 
				-
			
 
				-# #         for i, (val, score) in enumerate(scores):
			
 
				-# #             if i in visited:
			
 
				-# #                 continue
			
 
				-
			
 
				-# #             cluster = [val]
			
 
				-# #             visited.add(i)
			
 
				-
			
 
				-# #             # Find similar values
			
 
				-# #             for j in range(len(values)):
			
 
				-# #                 if j not in visited and similarity_matrix[i][j] >= cluster_threshold:
			
 
				-# #                     cluster.append(values[j])
			
 
				-# #                     visited.add(j)
			
 
				-
			
 
				-# #             clusters.append(cluster)
			
 
				-
			
 
				-# #         return clusters
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def get_dynamic_threshold(
			
 
				-# #         attr: str,
			
 
				-# #         val: str,
			
 
				-# #         base_score: float,
			
 
				-# #         extracted_attrs: Dict[str, List[Dict[str, str]]],
			
 
				-# #         relationships: Dict[str, float],
			
 
				-# #         mandatory_attrs: Dict[str, List[str]],
			
 
				-# #         base_threshold: float = 0.65,
			
 
				-# #         boost_factor: float = 0.15
			
 
				-# #     ) -> float:
			
 
				-# #         """
			
 
				-# #         Calculate dynamic threshold based on relationships with already-extracted attributes.
			
 
				-# #         """
			
 
				-# #         threshold = base_threshold
			
 
				-
			
 
				-# #         # Check relationships with already extracted attributes
			
 
				-# #         max_relationship = 0.0
			
 
				-# #         for other_attr, other_values_list in extracted_attrs.items():
			
 
				-# #             if other_attr == attr:
			
 
				-# #                 continue
			
 
				-
			
 
				-# #             for other_val_dict in other_values_list:
			
 
				-# #                 other_val = other_val_dict['value']
			
 
				-# #                 key = f"{attr}:{val}->{other_attr}:{other_val}"
			
 
				-# #                 if key in relationships:
			
 
				-# #                     max_relationship = max(max_relationship, relationships[key])
			
 
				-
			
 
				-# #         # If strong relationship exists, lower threshold
			
 
				-# #         if max_relationship > 0.6:
			
 
				-# #             threshold = base_threshold - (boost_factor * max_relationship)
			
 
				-
			
 
				-# #         return max(0.3, threshold)
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def get_adaptive_margin(
			
 
				-# #         scores: List[Tuple[str, float]],
			
 
				-# #         base_margin: float = 0.15,
			
 
				-# #         max_margin: float = 0.22
			
 
				-# #     ) -> float:
			
 
				-# #         """
			
 
				-# #         Calculate adaptive margin based on score distribution.
			
 
				-# #         """
			
 
				-# #         if len(scores) < 2:
			
 
				-# #             return base_margin
			
 
				-
			
 
				-# #         score_values = [s for _, s in scores]
			
 
				-# #         best_score = score_values[0]
			
 
				-
			
 
				-# #         # If best score is very low, use adaptive margin but be more conservative
			
 
				-# #         if best_score < 0.5:
			
 
				-# #             # Calculate score spread in top 3-4 scores only (more selective)
			
 
				-# #             top_scores = score_values[:min(4, len(score_values))]
			
 
				-# #             score_range = max(top_scores) - min(top_scores)
			
 
				-
			
 
				-# #             # Very controlled margin increase
			
 
				-# #             if score_range < 0.30:
			
 
				-# #                 # Much more conservative scaling
			
 
				-# #                 score_factor = (0.5 - best_score) * 0.35
			
 
				-# #                 adaptive = base_margin + score_factor + (0.30 - score_range) * 0.2
			
 
				-# #                 return min(adaptive, max_margin)
			
 
				-
			
 
				-# #         return base_margin
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def _lexical_evidence(product_text: str, label: str) -> float:
			
 
				-# #         """Calculate lexical overlap between product text and label."""
			
 
				-# #         pt = product_text.lower()
			
 
				-# #         tokens = [t for t in label.lower().replace("-", " ").split() if t]
			
 
				-# #         if not tokens:
			
 
				-# #             return 0.0
			
 
				-# #         hits = sum(1 for t in tokens if t in pt)
			
 
				-# #         return hits / len(tokens)
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def normalize_against_product_text(
			
 
				-# #     product_text: str,
			
 
				-# #     mandatory_attrs: Dict[str, List[str]],
			
 
				-# #     source_map: Dict[str, str],
			
 
				-# #     threshold_abs: float = 0.65,
			
 
				-# #     margin: float = 0.15,
			
 
				-# #     allow_multiple: bool = False,
			
 
				-# #     sem_weight: float = 0.8,
			
 
				-# #     lex_weight: float = 0.2,
			
 
				-# #     extracted_attrs: Optional[Dict[str, List[Dict[str, str]]]] = None,
			
 
				-# #     relationships: Optional[Dict[str, float]] = None,
			
 
				-# #     use_dynamic_thresholds: bool = True,
			
 
				-# #     use_adaptive_margin: bool = True,
			
 
				-# #     use_semantic_clustering: bool = True
			
 
				-# # ) -> dict:
			
 
				-# #         """
			
 
				-# #         Score each allowed value against the product_text with dynamic thresholds.
			
 
				-# #         Returns dict with values in array format: [{"value": "...", "source": "..."}]
			
 
				-# #         """
			
 
				-# #         if extracted_attrs is None:
			
 
				-# #             extracted_attrs = {}
			
 
				-# #         if relationships is None:
			
 
				-# #             relationships = {}
			
 
				-
			
 
				-# #         pt_emb = model_embedder.encode(product_text, convert_to_tensor=True)
			
 
				-# #         extracted = {}
			
 
				-
			
 
				-# #         for attr, allowed_values in mandatory_attrs.items():
			
 
				-# #             scores: List[Tuple[str, float]] = []
			
 
				-            
			
 
				-# #             # Check if this is a dimension attribute
			
 
				-# #             is_dimension_attr = any(keyword in attr.lower() for keyword in ['dimension', 'size', 'measurement'])
			
 
				-            
			
 
				-# #             # Normalize product text once for dimension matching
			
 
				-# #             normalized_product_text = ProductAttributeService.normalize_dimension_text(product_text) if is_dimension_attr else ""
			
 
				-
			
 
				-# #             for val in allowed_values:
			
 
				-# #                 # For dimension attributes, try exact normalized matching first
			
 
				-# #                 if is_dimension_attr:
			
 
				-# #                     # Normalize the allowed value from the list
			
 
				-# #                     normalized_val = ProductAttributeService.normalize_dimension_text(val)
			
 
				-                    
			
 
				-# #                     # If we have both normalized values and they match exactly, give highest score
			
 
				-# #                     if normalized_val and normalized_product_text and normalized_val == normalized_product_text:
			
 
				-# #                         scores.append((val, 1.0))
			
 
				-# #                         continue
			
 
				-                    
			
 
				-# #                     # Also check if the normalized value appears in the original product text
			
 
				-# #                     # This handles cases where the format might be slightly different
			
 
				-# #                     if normalized_val:
			
 
				-# #                         # Extract just the numbers for flexible matching
			
 
				-# #                         val_numbers = normalized_val.split('x')
			
 
				-# #                         # Check if both numbers appear in the product text in close proximity
			
 
				-# #                         text_lower = product_text.lower()
			
 
				-# #                         if all(num in text_lower for num in val_numbers):
			
 
				-# #                             # Calculate proximity score
			
 
				-# #                             idx1 = text_lower.find(val_numbers[0])
			
 
				-# #                             idx2 = text_lower.find(val_numbers[1])
			
 
				-# #                             if idx1 != -1 and idx2 != -1:
			
 
				-# #                                 distance = abs(idx2 - idx1)
			
 
				-# #                                 # If numbers are close together (within 20 characters), high score
			
 
				-# #                                 if distance < 20:
			
 
				-# #                                     scores.append((val, 0.95))
			
 
				-# #                                     continue
			
 
				-                
			
 
				-# #                 # Standard semantic matching for all attributes
			
 
				-# #                 contexts = [val, f"for {val}", f"use in {val}", f"suitable for {val}", f"{val} room"]
			
 
				-# #                 ctx_embs = [model_embedder.encode(c, convert_to_tensor=True) for c in contexts]
			
 
				-# #                 sem_sim = max(float(util.cos_sim(pt_emb, ce).item()) for ce in ctx_embs)
			
 
				-
			
 
				-# #                 lex_score = ProductAttributeService._lexical_evidence(product_text, val)
			
 
				-# #                 final_score = sem_weight * sem_sim + lex_weight * lex_score
			
 
				-# #                 scores.append((val, final_score))
			
 
				-
			
 
				-# #             scores.sort(key=lambda x: x[1], reverse=True)
			
 
				-# #             best_val, best_score = scores[0]
			
 
				-
			
 
				-# #             # Calculate adaptive margin if enabled
			
 
				-# #             effective_margin = margin
			
 
				-# #             if allow_multiple and use_adaptive_margin:
			
 
				-# #                 effective_margin = ProductAttributeService.get_adaptive_margin(scores, margin)
			
 
				-
			
 
				-# #             # Special handling for dimension attributes with exact matches
			
 
				-# #             # If we have a very high score (0.90+), it means we found an exact/normalized match
			
 
				-# #             # In this case, don't apply multiple selection logic - just return the best match
			
 
				-# #             if is_dimension_attr and best_score >= 0.90:
			
 
				-# #                 source = ProductAttributeService.find_value_source(best_val, source_map, attr)
			
 
				-# #                 extracted[attr] = [{"value": best_val, "source": source}]
			
 
				-# #                 continue
			
 
				-
			
 
				-# #             if not allow_multiple:
			
 
				-# #                 source = ProductAttributeService.find_value_source(best_val, source_map, attr)
			
 
				-# #                 extracted[attr] = [{"value": best_val, "source": source}]
			
 
				-# #             else:
			
 
				-# #                 candidates = [best_val]
			
 
				-# #                 use_base_threshold = best_score >= threshold_abs
			
 
				-
			
 
				-# #                 # Get semantic clusters if enabled
			
 
				-# #                 clusters = []
			
 
				-# #                 if use_semantic_clustering:
			
 
				-# #                     clusters = ProductAttributeService.calculate_value_clusters(
			
 
				-# #                         allowed_values, scores, cluster_threshold=0.4
			
 
				-# #                     )
			
 
				-# #                     best_cluster = next((c for c in clusters if best_val in c), [best_val])
			
 
				-
			
 
				-# #                 for val, sc in scores[1:]:
			
 
				-# #                     # Skip values with very low scores
			
 
				-# #                     min_score = 0.4 if is_dimension_attr else 0.3
			
 
				-# #                     if sc < min_score:
			
 
				-# #                         continue
			
 
				-                    
			
 
				-# #                     # Calculate dynamic threshold for this value
			
 
				-# #                     if use_dynamic_thresholds and extracted_attrs:
			
 
				-# #                         dynamic_thresh = ProductAttributeService.get_dynamic_threshold(
			
 
				-# #                             attr, val, sc, extracted_attrs, relationships,
			
 
				-# #                             mandatory_attrs, threshold_abs
			
 
				-# #                         )
			
 
				-# #                     else:
			
 
				-# #                         dynamic_thresh = threshold_abs
			
 
				-
			
 
				-# #                     within_margin = (best_score - sc) <= effective_margin
			
 
				-# #                     above_threshold = sc >= dynamic_thresh
			
 
				-
			
 
				-# #                     # Check if in same semantic cluster as best value
			
 
				-# #                     in_cluster = False
			
 
				-# #                     if use_semantic_clustering and clusters:
			
 
				-# #                         in_cluster = any(best_val in c and val in c for c in clusters)
			
 
				-
			
 
				-# #                     if use_base_threshold:
			
 
				-# #                         # Best score is good, require threshold OR (cluster + margin)
			
 
				-# #                         if above_threshold and within_margin:
			
 
				-# #                             candidates.append(val)
			
 
				-# #                         elif in_cluster and within_margin:
			
 
				-# #                             candidates.append(val)
			
 
				-# #                     else:
			
 
				-# #                         # Best score is low, use margin OR cluster logic
			
 
				-# #                         if within_margin:
			
 
				-# #                             candidates.append(val)
			
 
				-# #                         elif in_cluster and (best_score - sc) <= effective_margin * 2.0:
			
 
				-# #                             # Extended margin for cluster members
			
 
				-# #                             candidates.append(val)
			
 
				-
			
 
				-# #                 # Map each candidate to its source and create array format
			
 
				-# #                 extracted[attr] = []
			
 
				-# #                 for candidate in candidates:
			
 
				-# #                     source = ProductAttributeService.find_value_source(candidate, source_map, attr)
			
 
				-# #                     extracted[attr].append({"value": candidate, "source": source})
			
 
				-
			
 
				-# #         return extracted
			
 
				-
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def extract_attributes(
			
 
				-# #         product_text: str,
			
 
				-# #         mandatory_attrs: Dict[str, List[str]],
			
 
				-# #         source_map: Dict[str, str] = None,
			
 
				-# #         model: str = None,
			
 
				-# #         extract_additional: bool = True,
			
 
				-# #         multiple: Optional[List[str]] = None,
			
 
				-# #         threshold_abs: float = 0.65,
			
 
				-# #         margin: float = 0.15,
			
 
				-# #         use_dynamic_thresholds: bool = True,
			
 
				-# #         use_adaptive_margin: bool = True,
			
 
				-# #         use_semantic_clustering: bool = True
			
 
				-# #     ) -> dict:
			
 
				-# #         """
			
 
				-# #         Use Groq LLM to extract attributes from any product type with enhanced multi-value selection.
			
 
				-# #         Now returns values in array format: [{"value": "...", "source": "..."}]
			
 
				-# #         """
			
 
				-        
			
 
				-# #         if model is None:
			
 
				-# #             model = settings.SUPPORTED_MODELS[0]
			
 
				-
			
 
				-# #         if multiple is None:
			
 
				-# #             multiple = []
			
 
				-
			
 
				-# #         if source_map is None:
			
 
				-# #             source_map = {}
			
 
				-
			
 
				-# #         # Check if product text is empty or minimal
			
 
				-# #         if not product_text or product_text == "No product information available":
			
 
				-# #             return ProductAttributeService._create_error_response(
			
 
				-# #                 "No product information provided",
			
 
				-# #                 mandatory_attrs,
			
 
				-# #                 extract_additional
			
 
				-# #             )
			
 
				-
			
 
				-# #         # Create structured prompt for mandatory attributes
			
 
				-# #         mandatory_attr_list = []
			
 
				-# #         for attr_name, allowed_values in mandatory_attrs.items():
			
 
				-# #             mandatory_attr_list.append(f"{attr_name}: {', '.join(allowed_values)}")
			
 
				-# #         mandatory_attr_text = "\n".join(mandatory_attr_list)
			
 
				-
			
 
				-# #         additional_instruction = ""
			
 
				-# #         if extract_additional:
			
 
				-# #             additional_instruction = """
			
 
				-# # 2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
			
 
				-# #    that are NOT in the mandatory list. Only include attributes where you can find actual values
			
 
				-# #    in the product text. Do NOT include attributes with "Not Specified" or empty values.
			
 
				-   
			
 
				-# #    Examples of attributes to look for (only if present): Brand, Material, Size, Color, Dimensions,
			
 
				-# #    Weight, Features, Style, Theme, Pattern, Finish, Care Instructions, etc."""
			
 
				-
			
 
				-# #         output_format = {
			
 
				-# #             "mandatory": {attr: "value or list of values" for attr in mandatory_attrs.keys()},
			
 
				-# #         }
			
 
				-
			
 
				-# #         if extract_additional:
			
 
				-# #             output_format["additional"] = {
			
 
				-# #                 "example_attribute_1": "actual value found",
			
 
				-# #                 "example_attribute_2": "actual value found"
			
 
				-# #             }
			
 
				-# #             output_format["additional"]["_note"] = "Only include attributes with actual values found in text"
			
 
				-
			
 
				-# #         prompt = f"""
			
 
				-# # You are an intelligent product attribute extractor that works with ANY product type.
			
 
				-
			
 
				-# # TASK:
			
 
				-# # 1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value(s)
			
 
				-# #    from the provided list. Choose the value(s) that best match the product description.
			
 
				-# # {additional_instruction}
			
 
				-
			
 
				-# # Product Text:
			
 
				-# # {product_text}
			
 
				-
			
 
				-# # Mandatory Attribute Lists (MUST select from these allowed values):
			
 
				-# # {mandatory_attr_text}
			
 
				-
			
 
				-# # CRITICAL INSTRUCTIONS:
			
 
				-# # - Return ONLY valid JSON, nothing else
			
 
				-# # - No explanations, no markdown, no text before or after the JSON
			
 
				-# # - For mandatory attributes, choose the value(s) from the provided list that best match
			
 
				-# # - If a mandatory attribute cannot be determined from the product text, use "Not Specified"
			
 
				-# # - Prefer exact matches from the allowed values list over generic synonyms
			
 
				-# # - If multiple values are plausible, you MAY return more than one
			
 
				-# # {f"- For additional attributes: ONLY include attributes where you found actual values in the product text. DO NOT include attributes with 'Not Specified', 'None', 'N/A', or empty values. If you cannot find a value for an attribute, simply don't include that attribute." if extract_additional else ""}
			
 
				-# # - Be precise and only extract information that is explicitly stated or clearly implied
			
 
				-
			
 
				-# # Required Output Format:
			
 
				-# # {json.dumps(output_format, indent=2)}
			
 
				-# #         """
			
 
				-
			
 
				-# #         payload = {
			
 
				-# #             "model": model,
			
 
				-# #             "messages": [
			
 
				-# #                 {
			
 
				-# #                     "role": "system",
			
 
				-# #                     "content": f"You are a precise attribute extraction model. Return ONLY valid JSON with {'mandatory and additional' if extract_additional else 'mandatory'} sections. No explanations, no markdown, no other text."
			
 
				-# #                 },
			
 
				-# #                 {"role": "user", "content": prompt}
			
 
				-# #             ],
			
 
				-# #             "temperature": 0.0,
			
 
				-# #             "max_tokens": 1500
			
 
				-# #         }
			
 
				-
			
 
				-# #         headers = {
			
 
				-# #             "Authorization": f"Bearer {settings.GROQ_API_KEY}",
			
 
				-# #             "Content-Type": "application/json",
			
 
				-# #         }
			
 
				-
			
 
				-# #         try:
			
 
				-# #             response = requests.post(
			
 
				-# #                 settings.GROQ_API_URL,
			
 
				-# #                 headers=headers,
			
 
				-# #                 json=payload,
			
 
				-# #                 timeout=30
			
 
				-# #             )
			
 
				-# #             response.raise_for_status()
			
 
				-# #             result_text = response.json()["choices"][0]["message"]["content"].strip()
			
 
				-
			
 
				-# #             # Clean the response
			
 
				-# #             result_text = ProductAttributeService._clean_json_response(result_text)
			
 
				-
			
 
				-# #             # Parse JSON
			
 
				-# #             parsed = json.loads(result_text)
			
 
				-
			
 
				-# #             # Validate and restructure with source tracking
			
 
				-# #             parsed = ProductAttributeService._validate_response_structure(
			
 
				-# #                 parsed, mandatory_attrs, extract_additional, source_map
			
 
				-# #             )
			
 
				-
			
 
				-# #             # Clean up and add source tracking to additional attributes in array format
			
 
				-# #             if extract_additional and "additional" in parsed:
			
 
				-# #                 cleaned_additional = {}
			
 
				-# #                 for k, v in parsed["additional"].items():
			
 
				-# #                     if v and v not in ["Not Specified", "None", "N/A", "", "not specified", "none", "n/a"]:
			
 
				-# #                         if not (isinstance(v, str) and v.lower() in ["not specified", "none", "n/a", ""]):
			
 
				-# #                             # Convert to array format if not already
			
 
				-# #                             if isinstance(v, list):
			
 
				-# #                                 cleaned_additional[k] = []
			
 
				-# #                                 for item in v:
			
 
				-# #                                     if isinstance(item, dict) and "value" in item:
			
 
				-# #                                         if "source" not in item:
			
 
				-# #                                             item["source"] = ProductAttributeService.find_value_source(
			
 
				-# #                                                 item["value"], source_map, k
			
 
				-# #                                             )
			
 
				-# #                                         cleaned_additional[k].append(item)
			
 
				-# #                                     else:
			
 
				-# #                                         source = ProductAttributeService.find_value_source(str(item), source_map, k)
			
 
				-# #                                         cleaned_additional[k].append({"value": str(item), "source": source})
			
 
				-# #                             else:
			
 
				-# #                                 source = ProductAttributeService.find_value_source(str(v), source_map, k)
			
 
				-# #                                 cleaned_additional[k] = [{"value": str(v), "source": source}]
			
 
				-# #                 parsed["additional"] = cleaned_additional
			
 
				-
			
 
				-# #             # Calculate attribute relationships if using dynamic thresholds
			
 
				-# #             relationships = {}
			
 
				-# #             if use_dynamic_thresholds:
			
 
				-# #                 relationships = ProductAttributeService.calculate_attribute_relationships(
			
 
				-# #                     mandatory_attrs, product_text
			
 
				-# #                 )
			
 
				-
			
 
				-# #             # Process attributes in order, allowing earlier ones to influence later ones
			
 
				-# #             extracted_so_far = {}
			
 
				-# #             for attr in mandatory_attrs.keys():
			
 
				-# #                 allow_multiple = attr in multiple
			
 
				-
			
 
				-# #                 result = ProductAttributeService.normalize_against_product_text(
			
 
				-# #                     product_text=product_text,
			
 
				-# #                     mandatory_attrs={attr: mandatory_attrs[attr]},
			
 
				-# #                     source_map=source_map,
			
 
				-# #                     threshold_abs=threshold_abs,
			
 
				-# #                     margin=margin,
			
 
				-# #                     allow_multiple=allow_multiple,
			
 
				-# #                     extracted_attrs=extracted_so_far,
			
 
				-# #                     relationships=relationships,
			
 
				-# #                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				-# #                     use_adaptive_margin=use_adaptive_margin,
			
 
				-# #                     use_semantic_clustering=use_semantic_clustering
			
 
				-# #                 )
			
 
				-
			
 
				-# #                 # Result is already in array format from normalize_against_product_text
			
 
				-# #                 parsed["mandatory"][attr] = result[attr]
			
 
				-# #                 extracted_so_far[attr] = result[attr]
			
 
				-
			
 
				-# #             return parsed
			
 
				-
			
 
				-# #         except requests.exceptions.RequestException as e:
			
 
				-# #             return ProductAttributeService._create_error_response(
			
 
				-# #                 str(e), mandatory_attrs, extract_additional
			
 
				-# #             )
			
 
				-# #         except json.JSONDecodeError as e:
			
 
				-# #             return ProductAttributeService._create_error_response(
			
 
				-# #                 f"Invalid JSON: {str(e)}", mandatory_attrs, extract_additional, result_text
			
 
				-# #             )
			
 
				-# #         except Exception as e:
			
 
				-# #             return ProductAttributeService._create_error_response(
			
 
				-# #                 str(e), mandatory_attrs, extract_additional
			
 
				-# #             )
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def extract_attributes_batch(
			
 
				-# #         products: List[Dict],
			
 
				-# #         mandatory_attrs: Dict[str, List[str]],
			
 
				-# #         model: str = None,
			
 
				-# #         extract_additional: bool = True,
			
 
				-# #         process_image: bool = True,
			
 
				-# #         max_workers: int = 5,
			
 
				-# #         multiple: Optional[List[str]] = None,
			
 
				-# #         threshold_abs: float = 0.65,
			
 
				-# #         margin: float = 0.15,
			
 
				-# #         use_dynamic_thresholds: bool = True,
			
 
				-# #         use_adaptive_margin: bool = True,
			
 
				-# #         use_semantic_clustering: bool = True
			
 
				-# #     ) -> Dict:
			
 
				-# #         """Extract attributes for multiple products in parallel with enhanced multi-value selection and source tracking."""
			
 
				-# #         results = []
			
 
				-# #         successful = 0
			
 
				-# #         failed = 0
			
 
				-        
			
 
				-# #         ocr_service = OCRService()
			
 
				-
			
 
				-# #         if multiple is None:
			
 
				-# #             multiple = []
			
 
				-
			
 
				-# #         def process_product(product_data):
			
 
				-# #             """Process a single product."""
			
 
				-# #             product_id = product_data.get('product_id', f"product_{len(results)}")
			
 
				-            
			
 
				-# #             try:
			
 
				-# #                 # Process image if URL is provided
			
 
				-# #                 ocr_results = None
			
 
				-# #                 ocr_text = None
			
 
				-                
			
 
				-# #                 if process_image and product_data.get('image_url'):
			
 
				-# #                     ocr_results = ocr_service.process_image(product_data['image_url'])
			
 
				-                    
			
 
				-# #                     # Extract attributes from OCR
			
 
				-# #                     if ocr_results and ocr_results.get('detected_text'):
			
 
				-# #                         ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				-# #                             ocr_results, model
			
 
				-# #                         )
			
 
				-# #                         ocr_results['extracted_attributes'] = ocr_attrs
			
 
				-                        
			
 
				-# #                         # Format OCR text for combining with product text
			
 
				-# #                         ocr_text = "\n".join([
			
 
				-# #                             f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				-# #                             for item in ocr_results['detected_text']
			
 
				-# #                         ])
			
 
				-                
			
 
				-# #                 # Combine all product information with source tracking
			
 
				-# #                 product_text, source_map = ProductAttributeService.combine_product_text(
			
 
				-# #                     title=product_data.get('title'),
			
 
				-# #                     short_desc=product_data.get('short_desc'),
			
 
				-# #                     long_desc=product_data.get('long_desc'),
			
 
				-# #                     ocr_text=ocr_text
			
 
				-# #                 )
			
 
				-                
			
 
				-# #                 # Extract attributes from combined text with enhanced features
			
 
				-# #                 result = ProductAttributeService.extract_attributes(
			
 
				-# #                     product_text=product_text,
			
 
				-# #                     mandatory_attrs=mandatory_attrs,
			
 
				-# #                     source_map=source_map,
			
 
				-# #                     model=model,
			
 
				-# #                     extract_additional=extract_additional,
			
 
				-# #                     multiple=multiple,
			
 
				-# #                     threshold_abs=threshold_abs,
			
 
				-# #                     margin=margin,
			
 
				-# #                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				-# #                     use_adaptive_margin=use_adaptive_margin,
			
 
				-# #                     use_semantic_clustering=use_semantic_clustering
			
 
				-# #                 )
			
 
				-                
			
 
				-# #                 result['product_id'] = product_id
			
 
				-                
			
 
				-# #                 # Add OCR results if available (already in correct format)
			
 
				-# #                 if ocr_results:
			
 
				-# #                     result['ocr_results'] = ocr_results
			
 
				-                
			
 
				-# #                 # Check if extraction was successful
			
 
				-# #                 if 'error' not in result:
			
 
				-# #                     return result, True
			
 
				-# #                 else:
			
 
				-# #                     return result, False
			
 
				-                    
			
 
				-# #             except Exception as e:
			
 
				-# #                 return {
			
 
				-# #                     'product_id': product_id,
			
 
				-# #                     'mandatory': {attr: [{"value": "Not Specified", "source": "error"}] for attr in mandatory_attrs.keys()},
			
 
				-# #                     'additional': {} if extract_additional else None,
			
 
				-# #                     'error': f"Processing error: {str(e)}"
			
 
				-# #                 }, False
			
 
				-
			
 
				-# #         # Process products in parallel
			
 
				-# #         with ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				-# #             future_to_product = {
			
 
				-# #                 executor.submit(process_product, product): product 
			
 
				-# #                 for product in products
			
 
				-# #             }
			
 
				-            
			
 
				-# #             for future in as_completed(future_to_product):
			
 
				-# #                 try:
			
 
				-# #                     result, success = future.result()
			
 
				-# #                     results.append(result)
			
 
				-# #                     if success:
			
 
				-# #                         successful += 1
			
 
				-# #                     else:
			
 
				-# #                         failed += 1
			
 
				-# #                 except Exception as e:
			
 
				-# #                     failed += 1
			
 
				-# #                     results.append({
			
 
				-# #                         'product_id': 'unknown',
			
 
				-# #                         'mandatory': {attr: [{"value": "Not Specified", "source": "error"}] for attr in mandatory_attrs.keys()},
			
 
				-# #                         'additional': {} if extract_additional else None,
			
 
				-# #                         'error': f"Unexpected error: {str(e)}"
			
 
				-# #                     })
			
 
				-
			
 
				-# #         return {
			
 
				-# #             'results': results,
			
 
				-# #             'total_products': len(products),
			
 
				-# #             'successful': successful,
			
 
				-# #             'failed': failed
			
 
				-# #         }
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def _clean_json_response(text: str) -> str:
			
 
				-# #         """Clean LLM response to extract valid JSON."""
			
 
				-# #         start_idx = text.find('{')
			
 
				-# #         end_idx = text.rfind('}')
			
 
				-
			
 
				-# #         if start_idx != -1 and end_idx != -1:
			
 
				-# #             text = text[start_idx:end_idx + 1]
			
 
				-
			
 
				-# #         if "```json" in text:
			
 
				-# #             text = text.split("```json")[1].split("```")[0].strip()
			
 
				-# #         elif "```" in text:
			
 
				-# #             text = text.split("```")[1].split("```")[0].strip()
			
 
				-# #             if text.startswith("json"):
			
 
				-# #                 text = text[4:].strip()
			
 
				-
			
 
				-# #         return text
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def _validate_response_structure(
			
 
				-# #         parsed: dict,
			
 
				-# #         mandatory_attrs: Dict[str, List[str]],
			
 
				-# #         extract_additional: bool,
			
 
				-# #         source_map: Dict[str, str] = None
			
 
				-# #     ) -> dict:
			
 
				-# #         """Validate and fix the response structure, ensuring array format with source tracking."""
			
 
				-# #         if source_map is None:
			
 
				-# #             source_map = {}
			
 
				-        
			
 
				-# #         expected_sections = ["mandatory"]
			
 
				-# #         if extract_additional:
			
 
				-# #             expected_sections.append("additional")
			
 
				-
			
 
				-# #         if not all(section in parsed for section in expected_sections):
			
 
				-# #             if isinstance(parsed, dict):
			
 
				-# #                 mandatory_keys = set(mandatory_attrs.keys())
			
 
				-# #                 mandatory = {k: v for k, v in parsed.items() if k in mandatory_keys}
			
 
				-# #                 additional = {k: v for k, v in parsed.items() if k not in mandatory_keys}
			
 
				-
			
 
				-# #                 result = {"mandatory": mandatory}
			
 
				-# #                 if extract_additional:
			
 
				-# #                     result["additional"] = additional
			
 
				-# #                 parsed = result
			
 
				-# #             else:
			
 
				-# #                 return ProductAttributeService._create_error_response(
			
 
				-# #                     "Invalid response structure",
			
 
				-# #                     mandatory_attrs,
			
 
				-# #                     extract_additional,
			
 
				-# #                     str(parsed)
			
 
				-# #                 )
			
 
				-
			
 
				-# #         # Convert mandatory attributes to array format with source tracking
			
 
				-# #         if "mandatory" in parsed:
			
 
				-# #             converted_mandatory = {}
			
 
				-# #             for attr, value in parsed["mandatory"].items():
			
 
				-# #                 if isinstance(value, list):
			
 
				-# #                     # Already in array format, ensure each item has source
			
 
				-# #                     converted_mandatory[attr] = []
			
 
				-# #                     for item in value:
			
 
				-# #                         if isinstance(item, dict) and "value" in item:
			
 
				-# #                             # Already has proper structure
			
 
				-# #                             if "source" not in item:
			
 
				-# #                                 item["source"] = ProductAttributeService.find_value_source(
			
 
				-# #                                     item["value"], source_map, attr
			
 
				-# #                                 )
			
 
				-# #                             converted_mandatory[attr].append(item)
			
 
				-# #                         else:
			
 
				-# #                             # Convert string to proper format
			
 
				-# #                             source = ProductAttributeService.find_value_source(str(item), source_map, attr)
			
 
				-# #                             converted_mandatory[attr].append({"value": str(item), "source": source})
			
 
				-# #                 else:
			
 
				-# #                     # Single value - convert to array format
			
 
				-# #                     source = ProductAttributeService.find_value_source(str(value), source_map, attr)
			
 
				-# #                     converted_mandatory[attr] = [{"value": str(value), "source": source}]
			
 
				-            
			
 
				-# #             parsed["mandatory"] = converted_mandatory
			
 
				-
			
 
				-# #         return parsed
			
 
				-
			
 
				-# #     @staticmethod
			
 
				-# #     def _create_error_response(
			
 
				-# #         error: str,
			
 
				-# #         mandatory_attrs: Dict[str, List[str]],
			
 
				-# #         extract_additional: bool,
			
 
				-# #         raw_output: Optional[str] = None
			
 
				-# #     ) -> dict:
			
 
				-# #         """Create a standardized error response in array format."""
			
 
				-# #         response = {
			
 
				-# #             "mandatory": {attr: [{"value": "Not Specified", "source": "error"}] for attr in mandatory_attrs.keys()},
			
 
				-# #             "error": error
			
 
				-# #         }
			
 
				-# #         if extract_additional:
			
 
				-# #             response["additional"] = {}
			
 
				-# #         if raw_output:
			
 
				-# #             response["raw_output"] = raw_output
			
 
				-# #         return response
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-# # ==================== services.py (OPTIMIZED) ====================
			
 
				-# import requests
			
 
				-# import json
			
 
				-# import re
			
 
				-# import hashlib
			
 
				-# import logging
			
 
				-# from typing import Dict, List, Optional, Tuple
			
 
				-# from django.conf import settings
			
 
				-# from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				-# from sentence_transformers import SentenceTransformer, util
			
 
				-# import numpy as np
			
 
				-# from .ocr_service import OCRService
			
 
				-
			
 
				-# logger = logging.getLogger(__name__)
			
 
				-
			
 
				-# # Initialize embedding model for normalization (SINGLETON)
			
 
				-# model_embedder = SentenceTransformer("all-MiniLM-L6-v2")
			
 
				-
			
 
				-
			
 
				-# # ==================== CACHING CLASSES ====================
			
 
				-
			
 
				-# class SimpleCache:
			
 
				-#     """
			
 
				-#     In-memory cache for attribute extraction results.
			
 
				-#     No Redis required - uses Python dict with automatic size management.
			
 
				-#     """
			
 
				-#     _cache = {}
			
 
				-#     _max_size = 1000  # Maximum number of cached items
			
 
				-    
			
 
				-#     @classmethod
			
 
				-#     def get(cls, key: str) -> Optional[Dict]:
			
 
				-#         """Get cached value by key"""
			
 
				-#         return cls._cache.get(key)
			
 
				-    
			
 
				-#     @classmethod
			
 
				-#     def set(cls, key: str, value: Dict):
			
 
				-#         """Set cache value with automatic LRU cleanup"""
			
 
				-#         # Simple LRU: clear oldest 20% if cache is full
			
 
				-#         if len(cls._cache) >= cls._max_size:
			
 
				-#             items = list(cls._cache.items())
			
 
				-#             # Keep newest 80%
			
 
				-#             cls._cache = dict(items[int(cls._max_size * 0.2):])
			
 
				-#             logger.info(f"Cache cleaned: kept {len(cls._cache)} items")
			
 
				-        
			
 
				-#         cls._cache[key] = value
			
 
				-    
			
 
				-#     @classmethod
			
 
				-#     def clear(cls):
			
 
				-#         """Clear entire cache"""
			
 
				-#         cls._cache.clear()
			
 
				-#         logger.info("Cache cleared")
			
 
				-    
			
 
				-#     @classmethod
			
 
				-#     def get_stats(cls) -> Dict:
			
 
				-#         """Get cache statistics"""
			
 
				-#         return {
			
 
				-#             "size": len(cls._cache),
			
 
				-#             "max_size": cls._max_size,
			
 
				-#             "usage_percent": round(len(cls._cache) / cls._max_size * 100, 2)
			
 
				-#         }
			
 
				-
			
 
				-
			
 
				-# class EmbeddingCache:
			
 
				-#     """
			
 
				-#     Cache for sentence transformer embeddings.
			
 
				-#     Significantly reduces embedding computation time.
			
 
				-#     """
			
 
				-#     _cache = {}
			
 
				-#     _max_size = 500
			
 
				-#     _hit_count = 0
			
 
				-#     _miss_count = 0
			
 
				-    
			
 
				-#     @classmethod
			
 
				-#     def get_embedding(cls, text: str, model):
			
 
				-#         """Get or compute embedding with caching"""
			
 
				-#         if text in cls._cache:
			
 
				-#             cls._hit_count += 1
			
 
				-#             return cls._cache[text]
			
 
				-        
			
 
				-#         # Cache miss - compute embedding
			
 
				-#         cls._miss_count += 1
			
 
				-        
			
 
				-#         # Auto-cleanup if cache is full
			
 
				-#         if len(cls._cache) >= cls._max_size:
			
 
				-#             items = list(cls._cache.items())
			
 
				-#             cls._cache = dict(items[int(cls._max_size * 0.3):])
			
 
				-#             logger.info(f"Embedding cache cleaned: kept {len(cls._cache)} items")
			
 
				-        
			
 
				-#         # Compute and cache
			
 
				-#         embedding = model.encode(text, convert_to_tensor=True)
			
 
				-#         cls._cache[text] = embedding
			
 
				-#         return embedding
			
 
				-    
			
 
				-#     @classmethod
			
 
				-#     def clear(cls):
			
 
				-#         """Clear embedding cache"""
			
 
				-#         cls._cache.clear()
			
 
				-#         cls._hit_count = 0
			
 
				-#         cls._miss_count = 0
			
 
				-#         logger.info("Embedding cache cleared")
			
 
				-    
			
 
				-#     @classmethod
			
 
				-#     def get_stats(cls) -> Dict:
			
 
				-#         """Get cache statistics"""
			
 
				-#         total = cls._hit_count + cls._miss_count
			
 
				-#         hit_rate = (cls._hit_count / total * 100) if total > 0 else 0
			
 
				-#         return {
			
 
				-#             "size": len(cls._cache),
			
 
				-#             "max_size": cls._max_size,
			
 
				-#             "hits": cls._hit_count,
			
 
				-#             "misses": cls._miss_count,
			
 
				-#             "hit_rate_percent": round(hit_rate, 2)
			
 
				-#         }
			
 
				-
			
 
				-
			
 
				-# # ==================== MAIN SERVICE CLASS ====================
			
 
				-
			
 
				-# class ProductAttributeService:
			
 
				-#     """Service class for extracting product attributes using Groq LLM."""
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def _generate_cache_key(product_text: str, mandatory_attrs: Dict) -> str:
			
 
				-#         """
			
 
				-#         Generate a unique cache key from product text and attributes.
			
 
				-#         Uses MD5 hash for consistent short keys.
			
 
				-#         """
			
 
				-#         # Sort attributes for consistent hashing
			
 
				-#         attrs_str = json.dumps(mandatory_attrs, sort_keys=True)
			
 
				-#         content = f"{product_text}:{attrs_str}"
			
 
				-#         return f"attr_{hashlib.md5(content.encode()).hexdigest()}"
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def normalize_dimension_text(text: str) -> str:
			
 
				-#         """
			
 
				-#         Normalize dimension text to match format like '16x20', '20x30', etc.
			
 
				-#         Handles formats like '16 x 20', '16x1.5x20', '16 x 1.5 x 20 Inches'
			
 
				-#         Returns the normalized dimension (e.g., '16x20') or empty string if not found.
			
 
				-#         """
			
 
				-#         if not text:
			
 
				-#             return ""
			
 
				-        
			
 
				-#         # Convert to lowercase and remove common units
			
 
				-#         text = text.lower()
			
 
				-#         text = re.sub(r'\s*(inches|inch|in|cm|centimeters|mm|millimeters)\s*', '', text, flags=re.IGNORECASE)
			
 
				-        
			
 
				-#         # Extract all numbers from the text
			
 
				-#         numbers = re.findall(r'\d+\.?\d*', text)
			
 
				-        
			
 
				-#         if not numbers:
			
 
				-#             return ""
			
 
				-        
			
 
				-#         # Convert to floats first to handle decimals properly
			
 
				-#         float_numbers = []
			
 
				-#         for num in numbers:
			
 
				-#             try:
			
 
				-#                 float_numbers.append(float(num))
			
 
				-#             except:
			
 
				-#                 continue
			
 
				-        
			
 
				-#         if len(float_numbers) < 2:
			
 
				-#             return ""
			
 
				-        
			
 
				-#         # If we have 3 dimensions, it's likely Width x Depth x Height
			
 
				-#         # For wall art, depth is usually small (< 5), so we keep first and last
			
 
				-#         if len(float_numbers) == 3:
			
 
				-#             # Keep first and last values (width and height), skip middle (depth)
			
 
				-#             float_numbers = [float_numbers[0], float_numbers[2]]
			
 
				-#         elif len(float_numbers) > 3:
			
 
				-#             # If more than 3 dimensions, keep the two largest
			
 
				-#             float_numbers = sorted(float_numbers)[-2:]
			
 
				-#         else:
			
 
				-#             # Just 2 dimensions, use as is
			
 
				-#             float_numbers = float_numbers[:2]
			
 
				-        
			
 
				-#         # Format numbers: use integer if whole, else one decimal
			
 
				-#         formatted_numbers = []
			
 
				-#         for num in float_numbers:
			
 
				-#             if num.is_integer():
			
 
				-#                 formatted_numbers.append(str(int(num)))
			
 
				-#             else:
			
 
				-#                 formatted_numbers.append(f"{num:.1f}")
			
 
				-        
			
 
				-#         # Sort to ensure consistent order (smaller x larger)
			
 
				-#         formatted_numbers.sort(key=lambda x: float(x))
			
 
				-        
			
 
				-#         # Return formatted dimension
			
 
				-#         return f"{formatted_numbers[0]}x{formatted_numbers[1]}"
			
 
				-    
			
 
				-#     @staticmethod
			
 
				-#     def normalize_value_for_matching(value: str, attr_name: str = "") -> str:
			
 
				-#         """
			
 
				-#         Normalize a value based on its attribute type for better matching.
			
 
				-#         Currently handles dimensions specially, can be extended for other attributes.
			
 
				-#         """
			
 
				-#         # Check if this is a dimension-related attribute
			
 
				-#         dimension_keywords = ['dimension', 'size', 'measurement']
			
 
				-#         if any(keyword in attr_name.lower() for keyword in dimension_keywords):
			
 
				-#             normalized = ProductAttributeService.normalize_dimension_text(value)
			
 
				-#             if normalized:
			
 
				-#                 return normalized
			
 
				-        
			
 
				-#         # For other attributes, just return cleaned value
			
 
				-#         return value.strip()
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def combine_product_text(
			
 
				-#         title: Optional[str] = None,
			
 
				-#         short_desc: Optional[str] = None,
			
 
				-#         long_desc: Optional[str] = None,
			
 
				-#         ocr_text: Optional[str] = None
			
 
				-#     ) -> Tuple[str, Dict[str, str]]:
			
 
				-#         """
			
 
				-#         Combine product metadata into a single text block.
			
 
				-#         Returns: (combined_text, source_map) where source_map tracks which text came from where
			
 
				-#         """
			
 
				-#         parts = []
			
 
				-#         source_map = {}
			
 
				-        
			
 
				-#         if title:
			
 
				-#             title_str = str(title).strip()
			
 
				-#             parts.append(f"Title: {title_str}")
			
 
				-#             source_map['title'] = title_str
			
 
				-#         if short_desc:
			
 
				-#             short_str = str(short_desc).strip()
			
 
				-#             parts.append(f"Description: {short_str}")
			
 
				-#             source_map['short_desc'] = short_str
			
 
				-#         if long_desc:
			
 
				-#             long_str = str(long_desc).strip()
			
 
				-#             parts.append(f"Details: {long_str}")
			
 
				-#             source_map['long_desc'] = long_str
			
 
				-#         if ocr_text:
			
 
				-#             parts.append(f"OCR Text: {ocr_text}")
			
 
				-#             source_map['ocr_text'] = ocr_text
			
 
				-        
			
 
				-#         combined = "\n".join(parts).strip()
			
 
				-        
			
 
				-#         if not combined:
			
 
				-#             return "No product information available", {}
			
 
				-        
			
 
				-#         return combined, source_map
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def find_value_source(value: str, source_map: Dict[str, str], attr_name: str = "") -> str:
			
 
				-#         """
			
 
				-#         Find which source(s) contain the given value.
			
 
				-#         Returns the source name(s) where the value appears.
			
 
				-#         Now handles normalized matching for dimensions.
			
 
				-#         """
			
 
				-#         value_lower = value.lower()
			
 
				-#         # Split value into tokens for better matching
			
 
				-#         value_tokens = set(value_lower.replace("-", " ").replace("x", " ").split())
			
 
				-        
			
 
				-#         # Check if this is a dimension-related attribute
			
 
				-#         is_dimension_attr = any(keyword in attr_name.lower() for keyword in ['dimension', 'size', 'measurement'])
			
 
				-        
			
 
				-#         sources_found = []
			
 
				-#         source_scores = {}
			
 
				-        
			
 
				-#         for source_name, source_text in source_map.items():
			
 
				-#             source_lower = source_text.lower()
			
 
				-            
			
 
				-#             # Check for exact phrase match first
			
 
				-#             if value_lower in source_lower:
			
 
				-#                 source_scores[source_name] = 1.0
			
 
				-#                 continue
			
 
				-            
			
 
				-#             # For dimensions, check normalized match
			
 
				-#             if is_dimension_attr:
			
 
				-#                 # Normalize the value (e.g., "16x20" stays "16x20")
			
 
				-#                 normalized_value = ProductAttributeService.normalize_dimension_text(value)
			
 
				-#                 if not normalized_value:
			
 
				-#                     normalized_value = value.replace("x", " ").strip()
			
 
				-                
			
 
				-#                 # Normalize the source text to extract dimensions
			
 
				-#                 normalized_source = ProductAttributeService.normalize_dimension_text(source_text)
			
 
				-                
			
 
				-#                 # Direct match
			
 
				-#                 if normalized_value == normalized_source:
			
 
				-#                     source_scores[source_name] = 0.95
			
 
				-#                     continue
			
 
				-                
			
 
				-#                 # Also check if the dimension numbers appear in the source
			
 
				-#                 # Extract dimension parts (e.g., "16x20" -> ["16", "20"])
			
 
				-#                 dim_parts = normalized_value.split("x") if "x" in normalized_value else []
			
 
				-#                 if len(dim_parts) == 2:
			
 
				-#                     # Check if both numbers appear in the source
			
 
				-#                     if all(part in source_text for part in dim_parts):
			
 
				-#                         source_scores[source_name] = 0.85
			
 
				-#                         continue
			
 
				-            
			
 
				-#             # Check for token matches
			
 
				-#             token_matches = sum(1 for token in value_tokens if token and token in source_lower)
			
 
				-#             if token_matches > 0 and len(value_tokens) > 0:
			
 
				-#                 source_scores[source_name] = token_matches / len(value_tokens)
			
 
				-        
			
 
				-#         # Return source with highest score, or all sources if multiple have same score
			
 
				-#         if source_scores:
			
 
				-#             max_score = max(source_scores.values())
			
 
				-#             sources_found = [s for s, score in source_scores.items() if score == max_score]
			
 
				-            
			
 
				-#             # Prioritize: title > short_desc > long_desc > ocr_text
			
 
				-#             priority = ['title', 'short_desc', 'long_desc', 'ocr_text']
			
 
				-#             for p in priority:
			
 
				-#                 if p in sources_found:
			
 
				-#                     return p
			
 
				-            
			
 
				-#             return sources_found[0] if sources_found else "Not found"
			
 
				-        
			
 
				-#         return "Not found"
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def format_visual_attributes(visual_attributes: Dict) -> Dict:
			
 
				-#         """
			
 
				-#         Convert visual attributes to array format with source tracking.
			
 
				-#         Source is always 'image' for visual attributes.
			
 
				-#         """
			
 
				-#         formatted = {}
			
 
				-        
			
 
				-#         for key, value in visual_attributes.items():
			
 
				-#             if isinstance(value, list):
			
 
				-#                 # Already a list (like color_palette)
			
 
				-#                 formatted[key] = [{"value": str(item), "source": "image"} for item in value]
			
 
				-#             elif isinstance(value, dict):
			
 
				-#                 # Nested dictionary - format recursively
			
 
				-#                 nested_formatted = {}
			
 
				-#                 for nested_key, nested_value in value.items():
			
 
				-#                     if isinstance(nested_value, list):
			
 
				-#                         nested_formatted[nested_key] = [{"value": str(item), "source": "image"} for item in nested_value]
			
 
				-#                     else:
			
 
				-#                         nested_formatted[nested_key] = [{"value": str(nested_value), "source": "image"}]
			
 
				-#                 formatted[key] = nested_formatted
			
 
				-#             else:
			
 
				-#                 # Single value
			
 
				-#                 formatted[key] = [{"value": str(value), "source": "image"}]
			
 
				-        
			
 
				-#         return formatted
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def extract_attributes_from_ocr(ocr_results: Dict, model: str = None) -> Dict:
			
 
				-#         """Extract structured attributes from OCR text using LLM."""
			
 
				-#         if model is None:
			
 
				-#             model = settings.SUPPORTED_MODELS[0]
			
 
				-        
			
 
				-#         detected_text = ocr_results.get('detected_text', [])
			
 
				-#         if not detected_text:
			
 
				-#             return {}
			
 
				-        
			
 
				-#         # Format OCR text for prompt
			
 
				-#         ocr_text = "\n".join([f"Text: {item['text']}, Confidence: {item['confidence']:.2f}" 
			
 
				-#                               for item in detected_text])
			
 
				-        
			
 
				-#         prompt = f"""
			
 
				-# You are an AI model that extracts structured attributes from OCR text detected on product images.
			
 
				-# Given the OCR detections below, infer the possible product attributes and return them as a clean JSON object.
			
 
				-
			
 
				-# OCR Text:
			
 
				-# {ocr_text}
			
 
				-
			
 
				-# Extract relevant attributes like:
			
 
				-# - brand
			
 
				-# - model_number
			
 
				-# - size (waist_size, length, etc.)
			
 
				-# - collection
			
 
				-# - any other relevant product information
			
 
				-
			
 
				-# Return a JSON object with only the attributes you can confidently identify.
			
 
				-# If an attribute is not present, do not include it in the response.
			
 
				-# """
			
 
				-        
			
 
				-#         payload = {
			
 
				-#             "model": model,
			
 
				-#             "messages": [
			
 
				-#                 {
			
 
				-#                     "role": "system",
			
 
				-#                     "content": "You are a helpful AI that extracts structured data from OCR output. Return only valid JSON."
			
 
				-#                 },
			
 
				-#                 {"role": "user", "content": prompt}
			
 
				-#             ],
			
 
				-#             "temperature": 0.2,
			
 
				-#             "max_tokens": 500
			
 
				-#         }
			
 
				-        
			
 
				-#         headers = {
			
 
				-#             "Authorization": f"Bearer {settings.GROQ_API_KEY}",
			
 
				-#             "Content-Type": "application/json",
			
 
				-#         }
			
 
				-        
			
 
				-#         try:
			
 
				-#             response = requests.post(
			
 
				-#                 settings.GROQ_API_URL,
			
 
				-#                 headers=headers,
			
 
				-#                 json=payload,
			
 
				-#                 timeout=30
			
 
				-#             )
			
 
				-#             response.raise_for_status()
			
 
				-#             result_text = response.json()["choices"][0]["message"]["content"].strip()
			
 
				-            
			
 
				-#             # Clean and parse JSON
			
 
				-#             result_text = ProductAttributeService._clean_json_response(result_text)
			
 
				-#             parsed = json.loads(result_text)
			
 
				-            
			
 
				-#             # Convert to array format with source tracking
			
 
				-#             formatted_attributes = {}
			
 
				-#             for key, value in parsed.items():
			
 
				-#                 if key == "error":
			
 
				-#                     continue
			
 
				-                
			
 
				-#                 # Handle nested dictionaries (like size)
			
 
				-#                 if isinstance(value, dict):
			
 
				-#                     nested_formatted = {}
			
 
				-#                     for nested_key, nested_value in value.items():
			
 
				-#                         nested_formatted[nested_key] = [{"value": str(nested_value), "source": "image"}]
			
 
				-#                     formatted_attributes[key] = nested_formatted
			
 
				-#                 elif isinstance(value, list):
			
 
				-#                     # Already a list, convert each item
			
 
				-#                     formatted_attributes[key] = [{"value": str(item), "source": "image"} for item in value]
			
 
				-#                 else:
			
 
				-#                     # Single value
			
 
				-#                     formatted_attributes[key] = [{"value": str(value), "source": "image"}]
			
 
				-            
			
 
				-#             return formatted_attributes
			
 
				-#         except Exception as e:
			
 
				-#             logger.error(f"OCR attribute extraction failed: {str(e)}")
			
 
				-#             return {"error": f"Failed to extract attributes from OCR: {str(e)}"}
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def calculate_attribute_relationships(
			
 
				-#         mandatory_attrs: Dict[str, List[str]],
			
 
				-#         product_text: str
			
 
				-#     ) -> Dict[str, float]:
			
 
				-#         """
			
 
				-#         Calculate semantic relationships between attribute values across different attributes.
			
 
				-#         Returns a matrix of cross-attribute value similarities.
			
 
				-#         """
			
 
				-#         # USE EMBEDDING CACHE
			
 
				-#         pt_emb = EmbeddingCache.get_embedding(product_text, model_embedder)
			
 
				-
			
 
				-#         # Calculate similarities between all attribute values and product text
			
 
				-#         attr_scores = {}
			
 
				-#         for attr, values in mandatory_attrs.items():
			
 
				-#             attr_scores[attr] = {}
			
 
				-#             for val in values:
			
 
				-#                 contexts = [val, f"for {val}", f"use in {val}", f"suitable for {val}"]
			
 
				-#                 # USE EMBEDDING CACHE FOR CONTEXTS
			
 
				-#                 ctx_embs = [EmbeddingCache.get_embedding(c, model_embedder) for c in contexts]
			
 
				-#                 sem_sim = max(float(util.cos_sim(pt_emb, ce).item()) for ce in ctx_embs)
			
 
				-#                 attr_scores[attr][val] = sem_sim
			
 
				-
			
 
				-#         # Calculate cross-attribute value relationships
			
 
				-#         relationships = {}
			
 
				-#         attr_list = list(mandatory_attrs.keys())
			
 
				-
			
 
				-#         for i, attr1 in enumerate(attr_list):
			
 
				-#             for attr2 in attr_list[i+1:]:
			
 
				-#                 # Calculate pairwise similarities between values of different attributes
			
 
				-#                 for val1 in mandatory_attrs[attr1]:
			
 
				-#                     for val2 in mandatory_attrs[attr2]:
			
 
				-#                         # USE EMBEDDING CACHE
			
 
				-#                         emb1 = EmbeddingCache.get_embedding(val1, model_embedder)
			
 
				-#                         emb2 = EmbeddingCache.get_embedding(val2, model_embedder)
			
 
				-#                         sim = float(util.cos_sim(emb1, emb2).item())
			
 
				-
			
 
				-#                         # Store bidirectional relationships
			
 
				-#                         key1 = f"{attr1}:{val1}->{attr2}:{val2}"
			
 
				-#                         key2 = f"{attr2}:{val2}->{attr1}:{val1}"
			
 
				-#                         relationships[key1] = sim
			
 
				-#                         relationships[key2] = sim
			
 
				-
			
 
				-#         return relationships
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def calculate_value_clusters(
			
 
				-#         values: List[str],
			
 
				-#         scores: List[Tuple[str, float]],
			
 
				-#         cluster_threshold: float = 0.4
			
 
				-#     ) -> List[List[str]]:
			
 
				-#         """
			
 
				-#         Group values into semantic clusters based on their similarity to each other.
			
 
				-#         Returns clusters of related values.
			
 
				-#         """
			
 
				-#         if len(values) <= 1:
			
 
				-#             return [[val] for val, _ in scores]
			
 
				-
			
 
				-#         # Get embeddings for all values - USE CACHE
			
 
				-#         embeddings = [EmbeddingCache.get_embedding(val, model_embedder) for val in values]
			
 
				-
			
 
				-#         # Calculate pairwise similarities
			
 
				-#         similarity_matrix = np.zeros((len(values), len(values)))
			
 
				-#         for i in range(len(values)):
			
 
				-#             for j in range(i+1, len(values)):
			
 
				-#                 sim = float(util.cos_sim(embeddings[i], embeddings[j]).item())
			
 
				-#                 similarity_matrix[i][j] = sim
			
 
				-#                 similarity_matrix[j][i] = sim
			
 
				-
			
 
				-#         # Simple clustering: group values with high similarity
			
 
				-#         clusters = []
			
 
				-#         visited = set()
			
 
				-
			
 
				-#         for i, (val, score) in enumerate(scores):
			
 
				-#             if i in visited:
			
 
				-#                 continue
			
 
				-
			
 
				-#             cluster = [val]
			
 
				-#             visited.add(i)
			
 
				-
			
 
				-#             # Find similar values
			
 
				-#             for j in range(len(values)):
			
 
				-#                 if j not in visited and similarity_matrix[i][j] >= cluster_threshold:
			
 
				-#                     cluster.append(values[j])
			
 
				-#                     visited.add(j)
			
 
				-
			
 
				-#             clusters.append(cluster)
			
 
				-
			
 
				-#         return clusters
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def get_dynamic_threshold(
			
 
				-#         attr: str,
			
 
				-#         val: str,
			
 
				-#         base_score: float,
			
 
				-#         extracted_attrs: Dict[str, List[Dict[str, str]]],
			
 
				-#         relationships: Dict[str, float],
			
 
				-#         mandatory_attrs: Dict[str, List[str]],
			
 
				-#         base_threshold: float = 0.65,
			
 
				-#         boost_factor: float = 0.15
			
 
				-#     ) -> float:
			
 
				-#         """
			
 
				-#         Calculate dynamic threshold based on relationships with already-extracted attributes.
			
 
				-#         """
			
 
				-#         threshold = base_threshold
			
 
				-
			
 
				-#         # Check relationships with already extracted attributes
			
 
				-#         max_relationship = 0.0
			
 
				-#         for other_attr, other_values_list in extracted_attrs.items():
			
 
				-#             if other_attr == attr:
			
 
				-#                 continue
			
 
				-
			
 
				-#             for other_val_dict in other_values_list:
			
 
				-#                 other_val = other_val_dict['value']
			
 
				-#                 key = f"{attr}:{val}->{other_attr}:{other_val}"
			
 
				-#                 if key in relationships:
			
 
				-#                     max_relationship = max(max_relationship, relationships[key])
			
 
				-
			
 
				-#         # If strong relationship exists, lower threshold
			
 
				-#         if max_relationship > 0.6:
			
 
				-#             threshold = base_threshold - (boost_factor * max_relationship)
			
 
				-
			
 
				-#         return max(0.3, threshold)
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def get_adaptive_margin(
			
 
				-#         scores: List[Tuple[str, float]],
			
 
				-#         base_margin: float = 0.15,
			
 
				-#         max_margin: float = 0.22
			
 
				-#     ) -> float:
			
 
				-#         """
			
 
				-#         Calculate adaptive margin based on score distribution.
			
 
				-#         """
			
 
				-#         if len(scores) < 2:
			
 
				-#             return base_margin
			
 
				-
			
 
				-#         score_values = [s for _, s in scores]
			
 
				-#         best_score = score_values[0]
			
 
				-
			
 
				-#         # If best score is very low, use adaptive margin but be more conservative
			
 
				-#         if best_score < 0.5:
			
 
				-#             # Calculate score spread in top 3-4 scores only (more selective)
			
 
				-#             top_scores = score_values[:min(4, len(score_values))]
			
 
				-#             score_range = max(top_scores) - min(top_scores)
			
 
				-
			
 
				-#             # Very controlled margin increase
			
 
				-#             if score_range < 0.30:
			
 
				-#                 # Much more conservative scaling
			
 
				-#                 score_factor = (0.5 - best_score) * 0.35
			
 
				-#                 adaptive = base_margin + score_factor + (0.30 - score_range) * 0.2
			
 
				-#                 return min(adaptive, max_margin)
			
 
				-
			
 
				-#         return base_margin
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def _lexical_evidence(product_text: str, label: str) -> float:
			
 
				-#         """Calculate lexical overlap between product text and label."""
			
 
				-#         pt = product_text.lower()
			
 
				-#         tokens = [t for t in label.lower().replace("-", " ").split() if t]
			
 
				-#         if not tokens:
			
 
				-#             return 0.0
			
 
				-#         hits = sum(1 for t in tokens if t in pt)
			
 
				-#         return hits / len(tokens)
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def normalize_against_product_text(
			
 
				-#         product_text: str,
			
 
				-#         mandatory_attrs: Dict[str, List[str]],
			
 
				-#         source_map: Dict[str, str],
			
 
				-#         threshold_abs: float = 0.65,
			
 
				-#         margin: float = 0.15,
			
 
				-#         allow_multiple: bool = False,
			
 
				-#         sem_weight: float = 0.8,
			
 
				-#         lex_weight: float = 0.2,
			
 
				-#         extracted_attrs: Optional[Dict[str, List[Dict[str, str]]]] = None,
			
 
				-#         relationships: Optional[Dict[str, float]] = None,
			
 
				-#         use_dynamic_thresholds: bool = True,
			
 
				-#         use_adaptive_margin: bool = True,
			
 
				-#         use_semantic_clustering: bool = True
			
 
				-#     ) -> dict:
			
 
				-#         """
			
 
				-#         Score each allowed value against the product_text with dynamic thresholds.
			
 
				-#         Returns dict with values in array format: [{"value": "...", "source": "..."}]
			
 
				-        
			
 
				-#         ⚡ OPTIMIZED: Uses EmbeddingCache for faster computation
			
 
				-#         """
			
 
				-#         if extracted_attrs is None:
			
 
				-#             extracted_attrs = {}
			
 
				-#         if relationships is None:
			
 
				-#             relationships = {}
			
 
				-
			
 
				-#         # USE EMBEDDING CACHE - CRITICAL OPTIMIZATION
			
 
				-#         pt_emb = EmbeddingCache.get_embedding(product_text, model_embedder)
			
 
				-#         extracted = {}
			
 
				-
			
 
				-#         for attr, allowed_values in mandatory_attrs.items():
			
 
				-#             scores: List[Tuple[str, float]] = []
			
 
				-            
			
 
				-#             # Check if this is a dimension attribute
			
 
				-#             is_dimension_attr = any(keyword in attr.lower() for keyword in ['dimension', 'size', 'measurement'])
			
 
				-            
			
 
				-#             # Normalize product text once for dimension matching
			
 
				-#             normalized_product_text = ProductAttributeService.normalize_dimension_text(product_text) if is_dimension_attr else ""
			
 
				-
			
 
				-#             for val in allowed_values:
			
 
				-#                 # For dimension attributes, try exact normalized matching first
			
 
				-#                 if is_dimension_attr:
			
 
				-#                     # Normalize the allowed value from the list
			
 
				-#                     normalized_val = ProductAttributeService.normalize_dimension_text(val)
			
 
				-                    
			
 
				-#                     # If we have both normalized values and they match exactly, give highest score
			
 
				-#                     if normalized_val and normalized_product_text and normalized_val == normalized_product_text:
			
 
				-#                         scores.append((val, 1.0))
			
 
				-#                         continue
			
 
				-                    
			
 
				-#                     # Also check if the normalized value appears in the original product text
			
 
				-#                     if normalized_val:
			
 
				-#                         val_numbers = normalized_val.split('x')
			
 
				-#                         text_lower = product_text.lower()
			
 
				-#                         if all(num in text_lower for num in val_numbers):
			
 
				-#                             idx1 = text_lower.find(val_numbers[0])
			
 
				-#                             idx2 = text_lower.find(val_numbers[1])
			
 
				-#                             if idx1 != -1 and idx2 != -1:
			
 
				-#                                 distance = abs(idx2 - idx1)
			
 
				-#                                 if distance < 20:
			
 
				-#                                     scores.append((val, 0.95))
			
 
				-#                                     continue
			
 
				-                
			
 
				-#                 # Standard semantic matching - USE EMBEDDING CACHE
			
 
				-#                 contexts = [val, f"for {val}", f"use in {val}", f"suitable for {val}", f"{val} room"]
			
 
				-#                 ctx_embs = [EmbeddingCache.get_embedding(c, model_embedder) for c in contexts]
			
 
				-#                 sem_sim = max(float(util.cos_sim(pt_emb, ce).item()) for ce in ctx_embs)
			
 
				-
			
 
				-#                 lex_score = ProductAttributeService._lexical_evidence(product_text, val)
			
 
				-#                 final_score = sem_weight * sem_sim + lex_weight * lex_score
			
 
				-#                 scores.append((val, final_score))
			
 
				-
			
 
				-#             scores.sort(key=lambda x: x[1], reverse=True)
			
 
				-#             best_val, best_score = scores[0]
			
 
				-
			
 
				-#             # Calculate adaptive margin if enabled
			
 
				-#             effective_margin = margin
			
 
				-#             if allow_multiple and use_adaptive_margin:
			
 
				-#                 effective_margin = ProductAttributeService.get_adaptive_margin(scores, margin)
			
 
				-
			
 
				-#             # Special handling for dimension attributes with exact matches
			
 
				-#             if is_dimension_attr and best_score >= 0.90:
			
 
				-#                 source = ProductAttributeService.find_value_source(best_val, source_map, attr)
			
 
				-#                 extracted[attr] = [{"value": best_val, "source": source}]
			
 
				-#                 continue
			
 
				-
			
 
				-#             if not allow_multiple:
			
 
				-#                 source = ProductAttributeService.find_value_source(best_val, source_map, attr)
			
 
				-#                 extracted[attr] = [{"value": best_val, "source": source}]
			
 
				-#             else:
			
 
				-#                 candidates = [best_val]
			
 
				-#                 use_base_threshold = best_score >= threshold_abs
			
 
				-
			
 
				-#                 # Get semantic clusters if enabled
			
 
				-#                 clusters = []
			
 
				-#                 if use_semantic_clustering:
			
 
				-#                     clusters = ProductAttributeService.calculate_value_clusters(
			
 
				-#                         allowed_values, scores, cluster_threshold=0.4
			
 
				-#                     )
			
 
				-#                     best_cluster = next((c for c in clusters if best_val in c), [best_val])
			
 
				-
			
 
				-#                 for val, sc in scores[1:]:
			
 
				-#                     min_score = 0.4 if is_dimension_attr else 0.3
			
 
				-#                     if sc < min_score:
			
 
				-#                         continue
			
 
				-                    
			
 
				-#                     if use_dynamic_thresholds and extracted_attrs:
			
 
				-#                         dynamic_thresh = ProductAttributeService.get_dynamic_threshold(
			
 
				-#                             attr, val, sc, extracted_attrs, relationships,
			
 
				-#                             mandatory_attrs, threshold_abs
			
 
				-#                         )
			
 
				-#                     else:
			
 
				-#                         dynamic_thresh = threshold_abs
			
 
				-
			
 
				-#                     within_margin = (best_score - sc) <= effective_margin
			
 
				-#                     above_threshold = sc >= dynamic_thresh
			
 
				-
			
 
				-#                     in_cluster = False
			
 
				-#                     if use_semantic_clustering and clusters:
			
 
				-#                         in_cluster = any(best_val in c and val in c for c in clusters)
			
 
				-
			
 
				-#                     if use_base_threshold:
			
 
				-#                         if above_threshold and within_margin:
			
 
				-#                             candidates.append(val)
			
 
				-#                         elif in_cluster and within_margin:
			
 
				-#                             candidates.append(val)
			
 
				-#                     else:
			
 
				-#                         if within_margin:
			
 
				-#                             candidates.append(val)
			
 
				-#                         elif in_cluster and (best_score - sc) <= effective_margin * 2.0:
			
 
				-#                             candidates.append(val)
			
 
				-
			
 
				-#                 extracted[attr] = []
			
 
				-#                 for candidate in candidates:
			
 
				-#                     source = ProductAttributeService.find_value_source(candidate, source_map, attr)
			
 
				-#                     extracted[attr].append({"value": candidate, "source": source})
			
 
				-
			
 
				-#         return extracted
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def extract_attributes(
			
 
				-#         product_text: str,
			
 
				-#         mandatory_attrs: Dict[str, List[str]],
			
 
				-#         source_map: Dict[str, str] = None,
			
 
				-#         model: str = None,
			
 
				-#         extract_additional: bool = True,
			
 
				-#         multiple: Optional[List[str]] = None,
			
 
				-#         threshold_abs: float = 0.65,
			
 
				-#         margin: float = 0.15,
			
 
				-#         use_dynamic_thresholds: bool = True,
			
 
				-#         use_adaptive_margin: bool = True,
			
 
				-#         use_semantic_clustering: bool = True,
			
 
				-#         use_cache: bool = True  # ⚡ NEW: Enable/disable caching
			
 
				-#     ) -> dict:
			
 
				-#         """
			
 
				-#         Use Groq LLM to extract attributes from any product type with enhanced multi-value selection.
			
 
				-#         Now returns values in array format: [{"value": "...", "source": "..."}]
			
 
				-        
			
 
				-#         ⚡ OPTIMIZED: Added caching layer for faster repeated requests
			
 
				-#         """
			
 
				-        
			
 
				-#         if model is None:
			
 
				-#             model = settings.SUPPORTED_MODELS[0]
			
 
				-
			
 
				-#         if multiple is None:
			
 
				-#             multiple = []
			
 
				-
			
 
				-#         if source_map is None:
			
 
				-#             source_map = {}
			
 
				-
			
 
				-#         # Check if product text is empty or minimal
			
 
				-#         if not product_text or product_text == "No product information available":
			
 
				-#             return ProductAttributeService._create_error_response(
			
 
				-#                 "No product information provided",
			
 
				-#                 mandatory_attrs,
			
 
				-#                 extract_additional
			
 
				-#             )
			
 
				-
			
 
				-#         # ⚡ CHECK CACHE FIRST
			
 
				-#         if use_cache:
			
 
				-#             cache_key = ProductAttributeService._generate_cache_key(product_text, mandatory_attrs)
			
 
				-#             cached_result = SimpleCache.get(cache_key)
			
 
				-#             if cached_result:
			
 
				-#                 logger.info(f"✓ Cache hit - returning cached result")
			
 
				-#                 return cached_result
			
 
				-
			
 
				-#         # Create structured prompt for mandatory attributes
			
 
				-#         mandatory_attr_list = []
			
 
				-#         for attr_name, allowed_values in mandatory_attrs.items():
			
 
				-#             mandatory_attr_list.append(f"{attr_name}: {', '.join(allowed_values)}")
			
 
				-#         mandatory_attr_text = "\n".join(mandatory_attr_list)
			
 
				-
			
 
				-#         additional_instruction = ""
			
 
				-#         if extract_additional:
			
 
				-#             additional_instruction = """
			
 
				-# 2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
			
 
				-#    that are NOT in the mandatory list. Only include attributes where you can find actual values
			
 
				-#    in the product text. Do NOT include attributes with "Not Specified" or empty values.
			
 
				-   
			
 
				-#    Examples of attributes to look for (only if present): Brand, Material, Size, Color, Dimensions,
			
 
				-#    Weight, Features, Style, Theme, Pattern, Finish, Care Instructions, etc."""
			
 
				-
			
 
				-#         output_format = {
			
 
				-#             "mandatory": {attr: "value or list of values" for attr in mandatory_attrs.keys()},
			
 
				-#         }
			
 
				-
			
 
				-#         if extract_additional:
			
 
				-#             output_format["additional"] = {
			
 
				-#                 "example_attribute_1": "actual value found",
			
 
				-#                 "example_attribute_2": "actual value found"
			
 
				-#             }
			
 
				-#             output_format["additional"]["_note"] = "Only include attributes with actual values found in text"
			
 
				-
			
 
				-#         prompt = f"""
			
 
				-# You are an intelligent product attribute extractor that works with ANY product type.
			
 
				-
			
 
				-# TASK:
			
 
				-# 1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value(s)
			
 
				-#    from the provided list. Choose the value(s) that best match the product description.
			
 
				-# {additional_instruction}
			
 
				-
			
 
				-# Product Text:
			
 
				-# {product_text}
			
 
				-
			
 
				-# Mandatory Attribute Lists (MUST select from these allowed values):
			
 
				-# {mandatory_attr_text}
			
 
				-
			
 
				-# CRITICAL INSTRUCTIONS:
			
 
				-# - Return ONLY valid JSON, nothing else
			
 
				-# - No explanations, no markdown, no text before or after the JSON
			
 
				-# - For mandatory attributes, choose the value(s) from the provided list that best match
			
 
				-# - If a mandatory attribute cannot be determined from the product text, use "Not Specified"
			
 
				-# - Prefer exact matches from the allowed values list over generic synonyms
			
 
				-# - If multiple values are plausible, you MAY return more than one
			
 
				-# {f"- For additional attributes: ONLY include attributes where you found actual values in the product text. DO NOT include attributes with 'Not Specified', 'None', 'N/A', or empty values. If you cannot find a value for an attribute, simply don't include that attribute." if extract_additional else ""}
			
 
				-# - Be precise and only extract information that is explicitly stated or clearly implied
			
 
				-
			
 
				-# Required Output Format:
			
 
				-# {json.dumps(output_format, indent=2)}
			
 
				-#         """
			
 
				-
			
 
				-#         payload = {
			
 
				-#             "model": model,
			
 
				-#             "messages": [
			
 
				-#                 {
			
 
				-#                     "role": "system",
			
 
				-#                     "content": f"You are a precise attribute extraction model. Return ONLY valid JSON with {'mandatory and additional' if extract_additional else 'mandatory'} sections. No explanations, no markdown, no other text."
			
 
				-#                 },
			
 
				-#                 {"role": "user", "content": prompt}
			
 
				-#             ],
			
 
				-#             "temperature": 0.0,
			
 
				-#             "max_tokens": 1500
			
 
				-#         }
			
 
				-
			
 
				-#         headers = {
			
 
				-#             "Authorization": f"Bearer {settings.GROQ_API_KEY}",
			
 
				-#             "Content-Type": "application/json",
			
 
				-#         }
			
 
				-
			
 
				-#         try:
			
 
				-#             response = requests.post(
			
 
				-#                 settings.GROQ_API_URL,
			
 
				-#                 headers=headers,
			
 
				-#                 json=payload,
			
 
				-#                 timeout=30
			
 
				-#             )
			
 
				-#             response.raise_for_status()
			
 
				-#             result_text = response.json()["choices"][0]["message"]["content"].strip()
			
 
				-
			
 
				-#             # Clean the response
			
 
				-#             result_text = ProductAttributeService._clean_json_response(result_text)
			
 
				-
			
 
				-#             # Parse JSON
			
 
				-#             parsed = json.loads(result_text)
			
 
				-
			
 
				-#             # Validate and restructure with source tracking
			
 
				-#             parsed = ProductAttributeService._validate_response_structure(
			
 
				-#                 parsed, mandatory_attrs, extract_additional, source_map
			
 
				-#             )
			
 
				-
			
 
				-#             # Clean up and add source tracking to additional attributes in array format
			
 
				-#             if extract_additional and "additional" in parsed:
			
 
				-#                 cleaned_additional = {}
			
 
				-#                 for k, v in parsed["additional"].items():
			
 
				-#                     if v and v not in ["Not Specified", "None", "N/A", "", "not specified", "none", "n/a"]:
			
 
				-#                         if not (isinstance(v, str) and v.lower() in ["not specified", "none", "n/a", ""]):
			
 
				-#                             # Convert to array format if not already
			
 
				-#                             if isinstance(v, list):
			
 
				-#                                 cleaned_additional[k] = []
			
 
				-#                                 for item in v:
			
 
				-#                                     if isinstance(item, dict) and "value" in item:
			
 
				-#                                         if "source" not in item:
			
 
				-#                                             item["source"] = ProductAttributeService.find_value_source(
			
 
				-#                                                 item["value"], source_map, k
			
 
				-#                                             )
			
 
				-#                                         cleaned_additional[k].append(item)
			
 
				-#                                     else:
			
 
				-#                                         source = ProductAttributeService.find_value_source(str(item), source_map, k)
			
 
				-#                                         cleaned_additional[k].append({"value": str(item), "source": source})
			
 
				-#                             else:
			
 
				-#                                 source = ProductAttributeService.find_value_source(str(v), source_map, k)
			
 
				-#                                 cleaned_additional[k] = [{"value": str(v), "source": source}]
			
 
				-#                 parsed["additional"] = cleaned_additional
			
 
				-
			
 
				-#             # Calculate attribute relationships if using dynamic thresholds
			
 
				-#             relationships = {}
			
 
				-#             if use_dynamic_thresholds:
			
 
				-#                 relationships = ProductAttributeService.calculate_attribute_relationships(
			
 
				-#                     mandatory_attrs, product_text
			
 
				-#                 )
			
 
				-
			
 
				-#             # Process attributes in order, allowing earlier ones to influence later ones
			
 
				-#             extracted_so_far = {}
			
 
				-#             for attr in mandatory_attrs.keys():
			
 
				-#                 allow_multiple = attr in multiple
			
 
				-
			
 
				-#                 result = ProductAttributeService.normalize_against_product_text(
			
 
				-#                     product_text=product_text,
			
 
				-#                     mandatory_attrs={attr: mandatory_attrs[attr]},
			
 
				-#                     source_map=source_map,
			
 
				-#                     threshold_abs=threshold_abs,
			
 
				-#                     margin=margin,
			
 
				-#                     allow_multiple=allow_multiple,
			
 
				-#                     extracted_attrs=extracted_so_far,
			
 
				-#                     relationships=relationships,
			
 
				-#                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				-#                     use_adaptive_margin=use_adaptive_margin,
			
 
				-#                     use_semantic_clustering=use_semantic_clustering
			
 
				-#                 )
			
 
				-
			
 
				-#                 # Result is already in array format from normalize_against_product_text
			
 
				-#                 parsed["mandatory"][attr] = result[attr]
			
 
				-#                 extracted_so_far[attr] = result[attr]
			
 
				-
			
 
				-#             # ⚡ CACHE THE RESULT
			
 
				-#             if use_cache:
			
 
				-#                 SimpleCache.set(cache_key, parsed)
			
 
				-#                 logger.info(f"✓ Cached extraction result")
			
 
				-
			
 
				-#             return parsed
			
 
				-
			
 
				-#         except requests.exceptions.RequestException as e:
			
 
				-#             logger.error(f"Request exception: {str(e)}")
			
 
				-#             return ProductAttributeService._create_error_response(
			
 
				-#                 str(e), mandatory_attrs, extract_additional
			
 
				-#             )
			
 
				-#         except json.JSONDecodeError as e:
			
 
				-#             logger.error(f"JSON decode error: {str(e)}")
			
 
				-#             return ProductAttributeService._create_error_response(
			
 
				-#                 f"Invalid JSON: {str(e)}", mandatory_attrs, extract_additional, result_text
			
 
				-#             )
			
 
				-#         except Exception as e:
			
 
				-#             logger.error(f"Unexpected error: {str(e)}")
			
 
				-#             return ProductAttributeService._create_error_response(
			
 
				-#                 str(e), mandatory_attrs, extract_additional
			
 
				-#             )
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def extract_attributes_batch(
			
 
				-#         products: List[Dict],
			
 
				-#         mandatory_attrs: Dict[str, List[str]],
			
 
				-#         model: str = None,
			
 
				-#         extract_additional: bool = True,
			
 
				-#         process_image: bool = True,
			
 
				-#         max_workers: int = 5,
			
 
				-#         multiple: Optional[List[str]] = None,
			
 
				-#         threshold_abs: float = 0.65,
			
 
				-#         margin: float = 0.15,
			
 
				-#         use_dynamic_thresholds: bool = True,
			
 
				-#         use_adaptive_margin: bool = True,
			
 
				-#         use_semantic_clustering: bool = True,
			
 
				-#         use_cache: bool = True  # ⚡ NEW: Enable caching for batch processing
			
 
				-#     ) -> Dict:
			
 
				-#         """
			
 
				-#         Extract attributes for multiple products in parallel with enhanced multi-value selection and source tracking.
			
 
				-#         ⚡ OPTIMIZED: Added caching support for batch operations
			
 
				-#         """
			
 
				-#         results = []
			
 
				-#         successful = 0
			
 
				-#         failed = 0
			
 
				-        
			
 
				-#         ocr_service = OCRService()
			
 
				-
			
 
				-#         if multiple is None:
			
 
				-#             multiple = []
			
 
				-
			
 
				-#         def process_product(product_data):
			
 
				-#             """Process a single product."""
			
 
				-#             product_id = product_data.get('product_id', f"product_{len(results)}")
			
 
				-            
			
 
				-#             try:
			
 
				-#                 # Process image if URL is provided
			
 
				-#                 ocr_results = None
			
 
				-#                 ocr_text = None
			
 
				-                
			
 
				-#                 if process_image and product_data.get('image_url'):
			
 
				-#                     ocr_results = ocr_service.process_image(product_data['image_url'])
			
 
				-                    
			
 
				-#                     # Extract attributes from OCR
			
 
				-#                     if ocr_results and ocr_results.get('detected_text'):
			
 
				-#                         ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				-#                             ocr_results, model
			
 
				-#                         )
			
 
				-#                         ocr_results['extracted_attributes'] = ocr_attrs
			
 
				-                        
			
 
				-#                         # Format OCR text for combining with product text
			
 
				-#                         ocr_text = "\n".join([
			
 
				-#                             f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				-#                             for item in ocr_results['detected_text']
			
 
				-#                         ])
			
 
				-                
			
 
				-#                 # Combine all product information with source tracking
			
 
				-#                 product_text, source_map = ProductAttributeService.combine_product_text(
			
 
				-#                     title=product_data.get('title'),
			
 
				-#                     short_desc=product_data.get('short_desc'),
			
 
				-#                     long_desc=product_data.get('long_desc'),
			
 
				-#                     ocr_text=ocr_text
			
 
				-#                 )
			
 
				-                
			
 
				-#                 # Extract attributes from combined text with enhanced features
			
 
				-#                 result = ProductAttributeService.extract_attributes(
			
 
				-#                     product_text=product_text,
			
 
				-#                     mandatory_attrs=mandatory_attrs,
			
 
				-#                     source_map=source_map,
			
 
				-#                     model=model,
			
 
				-#                     extract_additional=extract_additional,
			
 
				-#                     multiple=multiple,
			
 
				-#                     threshold_abs=threshold_abs,
			
 
				-#                     margin=margin,
			
 
				-#                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				-#                     use_adaptive_margin=use_adaptive_margin,
			
 
				-#                     use_semantic_clustering=use_semantic_clustering,
			
 
				-#                     use_cache=use_cache  # ⚡ Pass cache flag
			
 
				-#                 )
			
 
				-                
			
 
				-#                 result['product_id'] = product_id
			
 
				-                
			
 
				-#                 # Add OCR results if available (already in correct format)
			
 
				-#                 if ocr_results:
			
 
				-#                     result['ocr_results'] = ocr_results
			
 
				-                
			
 
				-#                 # Check if extraction was successful
			
 
				-#                 if 'error' not in result:
			
 
				-#                     return result, True
			
 
				-#                 else:
			
 
				-#                     return result, False
			
 
				-                    
			
 
				-#             except Exception as e:
			
 
				-#                 logger.error(f"Error processing product {product_id}: {str(e)}")
			
 
				-#                 return {
			
 
				-#                     'product_id': product_id,
			
 
				-#                     'mandatory': {attr: [{"value": "Not Specified", "source": "error"}] for attr in mandatory_attrs.keys()},
			
 
				-#                     'additional': {} if extract_additional else None,
			
 
				-#                     'error': f"Processing error: {str(e)}"
			
 
				-#                 }, False
			
 
				-
			
 
				-#         # Process products in parallel
			
 
				-#         with ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				-#             future_to_product = {
			
 
				-#                 executor.submit(process_product, product): product 
			
 
				-#                 for product in products
			
 
				-#             }
			
 
				-            
			
 
				-#             for future in as_completed(future_to_product):
			
 
				-#                 try:
			
 
				-#                     result, success = future.result()
			
 
				-#                     results.append(result)
			
 
				-#                     if success:
			
 
				-#                         successful += 1
			
 
				-#                     else:
			
 
				-#                         failed += 1
			
 
				-#                 except Exception as e:
			
 
				-#                     logger.error(f"Future execution error: {str(e)}")
			
 
				-#                     failed += 1
			
 
				-#                     results.append({
			
 
				-#                         'product_id': 'unknown',
			
 
				-#                         'mandatory': {attr: [{"value": "Not Specified", "source": "error"}] for attr in mandatory_attrs.keys()},
			
 
				-#                         'additional': {} if extract_additional else None,
			
 
				-#                         'error': f"Unexpected error: {str(e)}"
			
 
				-#                     })
			
 
				-
			
 
				-#         return {
			
 
				-#             'results': results,
			
 
				-#             'total_products': len(products),
			
 
				-#             'successful': successful,
			
 
				-#             'failed': failed,
			
 
				-#             'cache_stats': SimpleCache.get_stats(),  # ⚡ Include cache statistics
			
 
				-#             'embedding_cache_stats': EmbeddingCache.get_stats()  # ⚡ Include embedding cache stats
			
 
				-#         }
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def _clean_json_response(text: str) -> str:
			
 
				-#         """Clean LLM response to extract valid JSON."""
			
 
				-#         start_idx = text.find('{')
			
 
				-#         end_idx = text.rfind('}')
			
 
				-
			
 
				-#         if start_idx != -1 and end_idx != -1:
			
 
				-#             text = text[start_idx:end_idx + 1]
			
 
				-
			
 
				-#         if "```json" in text:
			
 
				-#             text = text.split("```json")[1].split("```")[0].strip()
			
 
				-#         elif "```" in text:
			
 
				-#             text = text.split("```")[1].split("```")[0].strip()
			
 
				-#             if text.startswith("json"):
			
 
				-#                 text = text[4:].strip()
			
 
				-
			
 
				-#         return text
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def _validate_response_structure(
			
 
				-#         parsed: dict,
			
 
				-#         mandatory_attrs: Dict[str, List[str]],
			
 
				-#         extract_additional: bool,
			
 
				-#         source_map: Dict[str, str] = None
			
 
				-#     ) -> dict:
			
 
				-#         """Validate and fix the response structure, ensuring array format with source tracking."""
			
 
				-#         if source_map is None:
			
 
				-#             source_map = {}
			
 
				-        
			
 
				-#         expected_sections = ["mandatory"]
			
 
				-#         if extract_additional:
			
 
				-#             expected_sections.append("additional")
			
 
				-
			
 
				-#         if not all(section in parsed for section in expected_sections):
			
 
				-#             if isinstance(parsed, dict):
			
 
				-#                 mandatory_keys = set(mandatory_attrs.keys())
			
 
				-#                 mandatory = {k: v for k, v in parsed.items() if k in mandatory_keys}
			
 
				-#                 additional = {k: v for k, v in parsed.items() if k not in mandatory_keys}
			
 
				-
			
 
				-#                 result = {"mandatory": mandatory}
			
 
				-#                 if extract_additional:
			
 
				-#                     result["additional"] = additional
			
 
				-#                 parsed = result
			
 
				-#             else:
			
 
				-#                 return ProductAttributeService._create_error_response(
			
 
				-#                     "Invalid response structure",
			
 
				-#                     mandatory_attrs,
			
 
				-#                     extract_additional,
			
 
				-#                     str(parsed)
			
 
				-#                 )
			
 
				-
			
 
				-#         # Convert mandatory attributes to array format with source tracking
			
 
				-#         if "mandatory" in parsed:
			
 
				-#             converted_mandatory = {}
			
 
				-#             for attr, value in parsed["mandatory"].items():
			
 
				-#                 if isinstance(value, list):
			
 
				-#                     # Already in array format, ensure each item has source
			
 
				-#                     converted_mandatory[attr] = []
			
 
				-#                     for item in value:
			
 
				-#                         if isinstance(item, dict) and "value" in item:
			
 
				-#                             # Already has proper structure
			
 
				-#                             if "source" not in item:
			
 
				-#                                 item["source"] = ProductAttributeService.find_value_source(
			
 
				-#                                     item["value"], source_map, attr
			
 
				-#                                 )
			
 
				-#                             converted_mandatory[attr].append(item)
			
 
				-#                         else:
			
 
				-#                             # Convert string to proper format
			
 
				-#                             source = ProductAttributeService.find_value_source(str(item), source_map, attr)
			
 
				-#                             converted_mandatory[attr].append({"value": str(item), "source": source})
			
 
				-#                 else:
			
 
				-#                     # Single value - convert to array format
			
 
				-#                     source = ProductAttributeService.find_value_source(str(value), source_map, attr)
			
 
				-#                     converted_mandatory[attr] = [{"value": str(value), "source": source}]
			
 
				-            
			
 
				-#             parsed["mandatory"] = converted_mandatory
			
 
				-
			
 
				-#         return parsed
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def _create_error_response(
			
 
				-#         error: str,
			
 
				-#         mandatory_attrs: Dict[str, List[str]],
			
 
				-#         extract_additional: bool,
			
 
				-#         raw_output: Optional[str] = None
			
 
				-#     ) -> dict:
			
 
				-#         """Create a standardized error response in array format."""
			
 
				-#         response = {
			
 
				-#             "mandatory": {attr: [{"value": "Not Specified", "source": "error"}] for attr in mandatory_attrs.keys()},
			
 
				-#             "error": error
			
 
				-#         }
			
 
				-#         if extract_additional:
			
 
				-#             response["additional"] = {}
			
 
				-#         if raw_output:
			
 
				-#             response["raw_output"] = raw_output
			
 
				-#         return response
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def get_cache_stats() -> Dict:
			
 
				-#         """
			
 
				-#         Get statistics for both caches.
			
 
				-#         ⚡ NEW: Utility method to monitor cache performance
			
 
				-#         """
			
 
				-#         return {
			
 
				-#             "simple_cache": SimpleCache.get_stats(),
			
 
				-#             "embedding_cache": EmbeddingCache.get_stats()
			
 
				-#         }
			
 
				-
			
 
				-#     @staticmethod
			
 
				-#     def clear_all_caches():
			
 
				-#         """
			
 
				-#         Clear both caches.
			
 
				-#         ⚡ NEW: Utility method to reset caches when needed
			
 
				-#         """
			
 
				-#         SimpleCache.clear()
			
 
				-#         EmbeddingCache.clear()
			
 
				-#         logger.info("All caches cleared")
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				 
			
 
				 # ==================== services.py (PERFORMANCE OPTIMIZED) ====================
			
 
				 import requests
			
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -1,40 +1,77 @@
 
				+# ==================== Cleaned & Optimized Imports ====================
			
 
				+
			
 
				+import os
			
 
				+import io
			
 
				+import json
			
 
				+import logging
			
 
				+import threading
			
 
				+from datetime import datetime
			
 
				+import pandas as pd
			
 
				+import concurrent.futures
			
 
				+
			
 
				+from django.conf import settings
			
 
				+from django.http import HttpResponse
			
 
				+from django.db import transaction
			
 
				+from django.db.models import Prefetch
			
 
				+
			
 
				 from rest_framework.views import APIView
			
 
				 from rest_framework.response import Response
			
 
				 from rest_framework import status
			
 
				 from rest_framework.parsers import MultiPartParser, FormParser
			
 
				-from django.db import transaction
			
 
				-import pandas as pd
			
 
				-from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
			
 
				+
			
 
				+from openpyxl import Workbook
			
 
				+from openpyxl.styles import Font, PatternFill, Alignment
			
 
				+
			
 
				+# --- Local imports ---
			
 
				+from .models import (
			
 
				+    Product,
			
 
				+    ProductType,
			
 
				+    ProductAttribute,
			
 
				+    ProductAttributeValue,
			
 
				+    AttributePossibleValue,
			
 
				+)
			
 
				 from .serializers import (
			
 
				-    SingleProductRequestSerializer,
			
 
				-    BatchProductRequestSerializer,
			
 
				-    ProductAttributeResultSerializer, 
			
 
				-    BatchProductResponseSerializer,
			
 
				     ProductSerializer,
			
 
				     ProductTypeSerializer,
			
 
				     ProductAttributeSerializer,
			
 
				-    AttributePossibleValueSerializer
			
 
				+    AttributePossibleValueSerializer,
			
 
				+    SingleProductRequestSerializer,
			
 
				+    BatchProductRequestSerializer,
			
 
				+    ProductAttributeResultSerializer,
			
 
				+    BatchProductResponseSerializer,
			
 
				+    ProductAttributeValueSerializer,
			
 
				+    ProductAttributeValueInputSerializer,
			
 
				+    BulkProductAttributeValueSerializer,
			
 
				+    ProductWithAttributesSerializer,
			
 
				 )
			
 
				 from .services import ProductAttributeService
			
 
				 from .ocr_service import OCRService
			
 
				+from .visual_processing_service import VisualProcessingService
			
 
				 
			
 
				+# --- Configuration for Generated Outputs Folder ---
			
 
				+OUTPUT_FOLDER_NAME = 'generated_outputs'
			
 
				+OUTPUT_ROOT = os.path.join(settings.MEDIA_ROOT, OUTPUT_FOLDER_NAME)
			
 
				+OUTPUT_URL = os.path.join(settings.MEDIA_URL, OUTPUT_FOLDER_NAME).replace('\\', '/')  # Ensure forward slashes
			
 
				 
			
 
				+LOG_FILE_NAME = 'excel_generation.log'
			
 
				+STATUS_FILE_NAME = 'excel_generation_status.json'
			
 
				+EXCEL_FILE_NAME = 'generated_products.xlsx'
			
 
				 
			
 
				-# Sample test images (publicly available)
			
 
				-SAMPLE_IMAGES = {
			
 
				-    "tshirt": "https://images.unsplash.com/photo-1521572163474-6864f9cf17ab",
			
 
				-    "dress": "https://images.unsplash.com/photo-1595777457583-95e059d581b8",
			
 
				-    "jeans": "https://images.unsplash.com/photo-1542272604-787c3835535d"
			
 
				-}
			
 
				+LOG_FILE_PATH = os.path.join(OUTPUT_ROOT, LOG_FILE_NAME)
			
 
				+STATUS_FILE_PATH = os.path.join(OUTPUT_ROOT, STATUS_FILE_NAME)
			
 
				+EXCEL_FILE_PATH = os.path.join(OUTPUT_ROOT, EXCEL_FILE_NAME)
			
 
				 
			
 
				-# ==================== Updated views.py ====================
			
 
				-from rest_framework.views import APIView
			
 
				-from rest_framework.response import Response
			
 
				-from rest_framework import status
			
 
				-from .models import Product
			
 
				-from .services import ProductAttributeService
			
 
				-from .ocr_service import OCRService
			
 
				-from .visual_processing_service import VisualProcessingService
			
 
				+# Ensure the output folder exists
			
 
				+if not os.path.exists(OUTPUT_ROOT):
			
 
				+    os.makedirs(OUTPUT_ROOT)
			
 
				+
			
 
				+# Configure logging
			
 
				+logging.basicConfig(
			
 
				+    filename=LOG_FILE_PATH,
			
 
				+    level=logging.INFO,
			
 
				+    format='%(asctime)s - %(levelname)s - %(message)s'
			
 
				+)
			
 
				+logger = logging.getLogger(__name__)
			
 
				 
			
 
				 
			
 
				 class ExtractProductAttributesView(APIView):
			
@@ -131,398 +168,7 @@ class ExtractProductAttributesView(APIView):
 
				 
			
 
				         return Response(result, status=status.HTTP_200_OK)
			
 
				 
			
 
				-
			
 
				-
			
 
				-# Replace the BatchExtractProductAttributesView in your views.py with this updated version
			
 
				-
			
 
				-# class BatchExtractProductAttributesView(APIView):
			
 
				-#     """
			
 
				-#     API endpoint to extract product attributes for multiple products in batch.
			
 
				-#     Uses item-specific mandatory_attrs with source tracking.
			
 
				-#     Returns attributes in array format with original_value field.
			
 
				-#     Includes OCR and Visual Processing results.
			
 
				-#     """
			
 
				-
			
 
				-#     def post(self, request):
			
 
				-#         serializer = BatchProductRequestSerializer(data=request.data)
			
 
				-#         if not serializer.is_valid():
			
 
				-#             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
			
 
				-
			
 
				-#         validated_data = serializer.validated_data
			
 
				-        
			
 
				-#         # Get batch-level settings
			
 
				-#         product_list = validated_data.get("products", [])
			
 
				-#         model = validated_data.get("model")
			
 
				-#         extract_additional = validated_data.get("extract_additional", True)
			
 
				-#         process_image = validated_data.get("process_image", True)
			
 
				-#         multiple = validated_data.get("multiple", [])
			
 
				-#         threshold_abs = validated_data.get("threshold_abs", 0.65)
			
 
				-#         margin = validated_data.get("margin", 0.15)
			
 
				-#         use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
			
 
				-#         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
			
 
				-#         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
			
 
				-        
			
 
				-#         # Extract all item_ids to query the database efficiently
			
 
				-#         item_ids = [p['item_id'] for p in product_list] 
			
 
				-        
			
 
				-#         # Fetch all products in one query
			
 
				-#         products_queryset = Product.objects.filter(item_id__in=item_ids)
			
 
				-#         product_map = {product.item_id: product for product in products_queryset}
			
 
				-#         found_ids = set(product_map.keys())
			
 
				-        
			
 
				-#         # Fetch all original attribute values for these products in one query
			
 
				-#         original_values_qs = ProductAttributeValue.objects.filter(
			
 
				-#             product__item_id__in=item_ids
			
 
				-#         ).select_related('product')
			
 
				-        
			
 
				-#         # Create a nested dictionary: {item_id: {attribute_name: original_value}}
			
 
				-#         original_values_map = {}
			
 
				-#         for attr_val in original_values_qs:
			
 
				-#             item_id = attr_val.product.item_id
			
 
				-#             if item_id not in original_values_map:
			
 
				-#                 original_values_map[item_id] = {}
			
 
				-#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
			
 
				-        
			
 
				-#         results = []
			
 
				-#         successful = 0
			
 
				-#         failed = 0
			
 
				-
			
 
				-#         for product_entry in product_list:
			
 
				-#             item_id = product_entry['item_id']
			
 
				-#             mandatory_attrs = product_entry['mandatory_attrs'] 
			
 
				-
			
 
				-#             if item_id not in found_ids:
			
 
				-#                 failed += 1
			
 
				-#                 results.append({
			
 
				-#                     "product_id": item_id,
			
 
				-#                     "error": "Product not found in database"
			
 
				-#                 })
			
 
				-#                 continue
			
 
				-
			
 
				-#             product = product_map[item_id]
			
 
				-            
			
 
				-#             try: 
			
 
				-#                 title = product.product_name
			
 
				-#                 short_desc = product.product_short_description
			
 
				-#                 long_desc = product.product_long_description
			
 
				-#                 image_url = product.image_path
			
 
				-                
			
 
				-#                 ocr_results = None
			
 
				-#                 ocr_text = None
			
 
				-#                 visual_results = None
			
 
				-
			
 
				-#                 # Image Processing Logic
			
 
				-#                 if process_image and image_url:
			
 
				-#                     # OCR Processing
			
 
				-#                     ocr_service = OCRService()
			
 
				-#                     ocr_results = ocr_service.process_image(image_url)
			
 
				-                    
			
 
				-#                     if ocr_results and ocr_results.get("detected_text"):
			
 
				-#                         ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				-#                             ocr_results, model
			
 
				-#                         )
			
 
				-#                         ocr_results["extracted_attributes"] = ocr_attrs
			
 
				-#                         ocr_text = "\n".join([
			
 
				-#                             f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				-#                             for item in ocr_results["detected_text"]
			
 
				-#                         ])
			
 
				-                    
			
 
				-#                     # Visual Processing
			
 
				-#                     visual_service = VisualProcessingService()
			
 
				-#                     product_type_hint = product.product_type if hasattr(product, 'product_type') else None
			
 
				-#                     visual_results = visual_service.process_image(image_url, product_type_hint)
			
 
				-                    
			
 
				-#                     # Format visual attributes to array format with source tracking
			
 
				-#                     if visual_results and visual_results.get('visual_attributes'):
			
 
				-#                         visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
			
 
				-#                             visual_results['visual_attributes']
			
 
				-#                         )
			
 
				-
			
 
				-#                 # Combine product text with source tracking
			
 
				-#                 product_text, source_map = ProductAttributeService.combine_product_text(
			
 
				-#                     title=title,
			
 
				-#                     short_desc=short_desc,
			
 
				-#                     long_desc=long_desc,
			
 
				-#                     ocr_text=ocr_text
			
 
				-#                 )
			
 
				-
			
 
				-#                 # Attribute Extraction with source tracking (returns array format)
			
 
				-#                 extracted = ProductAttributeService.extract_attributes(
			
 
				-#                     product_text=product_text,
			
 
				-#                     mandatory_attrs=mandatory_attrs,
			
 
				-#                     source_map=source_map,
			
 
				-#                     model=model,
			
 
				-#                     extract_additional=extract_additional,
			
 
				-#                     multiple=multiple,
			
 
				-#                     threshold_abs=threshold_abs,
			
 
				-#                     margin=margin,
			
 
				-#                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				-#                     use_adaptive_margin=use_adaptive_margin,
			
 
				-#                     use_semantic_clustering=use_semantic_clustering
			
 
				-#                 )
			
 
				-
			
 
				-#                 # Add original_value to each extracted attribute
			
 
				-#                 original_attrs = original_values_map.get(item_id, {})
			
 
				-                
			
 
				-#                 # Process mandatory attributes
			
 
				-#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
			
 
				-#                     if isinstance(attr_values, list):
			
 
				-#                         for attr_obj in attr_values:
			
 
				-#                             if isinstance(attr_obj, dict):
			
 
				-#                                 # Add original_value if it exists
			
 
				-#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				-                
			
 
				-#                 # Process additional attributes
			
 
				-#                 for attr_name, attr_values in extracted.get("additional", {}).items():
			
 
				-#                     if isinstance(attr_values, list):
			
 
				-#                         for attr_obj in attr_values:
			
 
				-#                             if isinstance(attr_obj, dict):
			
 
				-#                                 # Add original_value if it exists
			
 
				-#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				-
			
 
				-#                 result = {
			
 
				-#                     "product_id": product.item_id,
			
 
				-#                     "mandatory": extracted.get("mandatory", {}),
			
 
				-#                     "additional": extracted.get("additional", {}),
			
 
				-#                 }
			
 
				-
			
 
				-#                 # Attach OCR results if available
			
 
				-#                 if ocr_results:
			
 
				-#                     result["ocr_results"] = ocr_results
			
 
				-                
			
 
				-#                 # Attach Visual Processing results if available
			
 
				-#                 if visual_results:
			
 
				-#                     result["visual_results"] = visual_results
			
 
				-
			
 
				-#                 results.append(result)
			
 
				-#                 successful += 1
			
 
				-
			
 
				-#             except Exception as e:
			
 
				-#                 failed += 1
			
 
				-#                 results.append({
			
 
				-#                     "product_id": item_id,
			
 
				-#                     "error": str(e)
			
 
				-#                 })
			
 
				-
			
 
				-#         batch_result = {
			
 
				-#             "results": results,
			
 
				-#             "total_products": len(product_list),
			
 
				-#             "successful": successful,
			
 
				-#             "failed": failed
			
 
				-#         }
			
 
				-
			
 
				-#         response_serializer = BatchProductResponseSerializer(data=batch_result)
			
 
				-#         if response_serializer.is_valid():
			
 
				-#             return Response(response_serializer.data, status=status.HTTP_200_OK)
			
 
				-
			
 
				-#         return Response(batch_result, status=status.HTTP_200_OK)
			
 
				-
			
 
				-
			
 
				-# views.py - OPTIMIZED WITHOUT REDIS/CELERY
			
 
				-
			
 
				-# class BatchExtractProductAttributesView(APIView):
			
 
				-#     """
			
 
				-#     Optimized batch extraction using ThreadPoolExecutor (built-in Python)
			
 
				-#     """
			
 
				-
			
 
				-#     def post(self, request):
			
 
				-#         serializer = BatchProductRequestSerializer(data=request.data)
			
 
				-#         if not serializer.is_valid():
			
 
				-#             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
			
 
				-
			
 
				-#         validated_data = serializer.validated_data
			
 
				-#         product_list = validated_data.get("products", [])
			
 
				-        
			
 
				-#         # OPTIMIZATION 1: Single optimized database query
			
 
				-#         item_ids = [p['item_id'] for p in product_list]
			
 
				-#         products_queryset = Product.objects.filter(
			
 
				-#             item_id__in=item_ids
			
 
				-#         ).prefetch_related('attribute_values')  # Single query!
			
 
				-        
			
 
				-#         product_map = {product.item_id: product for product in products_queryset}
			
 
				-        
			
 
				-#         # OPTIMIZATION 2: Prefetch ALL original attribute values in ONE query
			
 
				-#         original_values_qs = ProductAttributeValue.objects.filter(
			
 
				-#             product__item_id__in=item_ids
			
 
				-#         ).select_related('product')
			
 
				-        
			
 
				-#         original_values_map = {}
			
 
				-#         for attr_val in original_values_qs:
			
 
				-#             item_id = attr_val.product.item_id
			
 
				-#             if item_id not in original_values_map:
			
 
				-#                 original_values_map[item_id] = {}
			
 
				-#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
			
 
				-        
			
 
				-#         # Extract settings
			
 
				-#         model = validated_data.get("model")
			
 
				-#         extract_additional = validated_data.get("extract_additional", True)
			
 
				-#         process_image = validated_data.get("process_image", True)
			
 
				-#         multiple = validated_data.get("multiple", [])
			
 
				-#         threshold_abs = validated_data.get("threshold_abs", 0.65)
			
 
				-#         margin = validated_data.get("margin", 0.15)
			
 
				-#         use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
			
 
				-#         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
			
 
				-#         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
			
 
				-        
			
 
				-#         results = []
			
 
				-#         successful = 0
			
 
				-#         failed = 0
			
 
				-        
			
 
				-#         # OPTIMIZATION 3: Initialize services once
			
 
				-#         ocr_service = OCRService() if process_image else None
			
 
				-#         visual_service = VisualProcessingService() if process_image else None
			
 
				-
			
 
				-#         # OPTIMIZATION 4: Process in parallel using ThreadPoolExecutor
			
 
				-#         def process_single_product(product_entry):
			
 
				-#             """Process a single product (runs in parallel)"""
			
 
				-#             item_id = product_entry['item_id']
			
 
				-#             mandatory_attrs = product_entry['mandatory_attrs']
			
 
				-
			
 
				-#             if item_id not in product_map:
			
 
				-#                 return {
			
 
				-#                     "product_id": item_id,
			
 
				-#                     "error": "Product not found in database"
			
 
				-#                 }, False
			
 
				-
			
 
				-#             product = product_map[item_id]
			
 
				-            
			
 
				-#             try:
			
 
				-#                 title = product.product_name
			
 
				-#                 short_desc = product.product_short_description
			
 
				-#                 long_desc = product.product_long_description
			
 
				-#                 image_url = product.image_path
			
 
				-                
			
 
				-#                 ocr_results = None
			
 
				-#                 ocr_text = None
			
 
				-#                 visual_results = None
			
 
				-
			
 
				-#                 # Image processing (if enabled)
			
 
				-#                 if process_image and image_url:
			
 
				-#                     if ocr_service:
			
 
				-#                         ocr_results = ocr_service.process_image(image_url)
			
 
				-                        
			
 
				-#                         if ocr_results and ocr_results.get("detected_text"):
			
 
				-#                             ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				-#                                 ocr_results, model
			
 
				-#                             )
			
 
				-#                             ocr_results["extracted_attributes"] = ocr_attrs
			
 
				-#                             ocr_text = "\n".join([
			
 
				-#                                 f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				-#                                 for item in ocr_results["detected_text"]
			
 
				-#                             ])
			
 
				-                    
			
 
				-#                     if visual_service:
			
 
				-#                         product_type_hint = product.product_type if hasattr(product, 'product_type') else None
			
 
				-#                         visual_results = visual_service.process_image(image_url, product_type_hint)
			
 
				-                        
			
 
				-#                         if visual_results and visual_results.get('visual_attributes'):
			
 
				-#                             visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
			
 
				-#                                 visual_results['visual_attributes']
			
 
				-#                             )
			
 
				-
			
 
				-#                 # Combine product text with source tracking
			
 
				-#                 product_text, source_map = ProductAttributeService.combine_product_text(
			
 
				-#                     title=title,
			
 
				-#                     short_desc=short_desc,
			
 
				-#                     long_desc=long_desc,
			
 
				-#                     ocr_text=ocr_text
			
 
				-#                 )
			
 
				-
			
 
				-#                 # Extract attributes (WITH CACHING ENABLED)
			
 
				-#                 extracted = ProductAttributeService.extract_attributes(
			
 
				-#                     product_text=product_text,
			
 
				-#                     mandatory_attrs=mandatory_attrs,
			
 
				-#                     source_map=source_map,
			
 
				-#                     model=model,
			
 
				-#                     extract_additional=extract_additional,
			
 
				-#                     multiple=multiple,
			
 
				-#                     threshold_abs=threshold_abs,
			
 
				-#                     margin=margin,
			
 
				-#                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				-#                     use_adaptive_margin=use_adaptive_margin,
			
 
				-#                     use_semantic_clustering=use_semantic_clustering,
			
 
				-#                     use_cache=True  # Enable caching!
			
 
				-#                 )
			
 
				-
			
 
				-#                 # Add original values
			
 
				-#                 original_attrs = original_values_map.get(item_id, {})
			
 
				-                
			
 
				-#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
			
 
				-#                     if isinstance(attr_values, list):
			
 
				-#                         for attr_obj in attr_values:
			
 
				-#                             if isinstance(attr_obj, dict):
			
 
				-#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				-                
			
 
				-#                 for attr_name, attr_values in extracted.get("additional", {}).items():
			
 
				-#                     if isinstance(attr_values, list):
			
 
				-#                         for attr_obj in attr_values:
			
 
				-#                             if isinstance(attr_obj, dict):
			
 
				-#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				-
			
 
				-#                 result = {
			
 
				-#                     "product_id": product.item_id,
			
 
				-#                     "mandatory": extracted.get("mandatory", {}),
			
 
				-#                     "additional": extracted.get("additional", {}),
			
 
				-#                 }
			
 
				-
			
 
				-#                 if ocr_results:
			
 
				-#                     result["ocr_results"] = ocr_results
			
 
				-                
			
 
				-#                 if visual_results:
			
 
				-#                     result["visual_results"] = visual_results
			
 
				-
			
 
				-#                 return result, True
			
 
				-
			
 
				-#             except Exception as e:
			
 
				-#                 return {
			
 
				-#                     "product_id": item_id,
			
 
				-#                     "error": str(e)
			
 
				-#                 }, False
			
 
				-
			
 
				-#         # OPTIMIZATION 5: Use ThreadPoolExecutor for parallel processing
			
 
				-#         import concurrent.futures
			
 
				-#         max_workers = min(10, len(product_list))  # Up to 10 parallel workers
			
 
				-        
			
 
				-#         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				-#             # Submit all tasks
			
 
				-#             future_to_product = {
			
 
				-#                 executor.submit(process_single_product, product): product
			
 
				-#                 for product in product_list
			
 
				-#             }
			
 
				-            
			
 
				-#             # Collect results as they complete
			
 
				-#             for future in concurrent.futures.as_completed(future_to_product):
			
 
				-#                 try:
			
 
				-#                     result, success = future.result()
			
 
				-#                     results.append(result)
			
 
				-#                     if success:
			
 
				-#                         successful += 1
			
 
				-#                     else:
			
 
				-#                         failed += 1
			
 
				-#                 except Exception as e:
			
 
				-#                     failed += 1
			
 
				-#                     logger.error(f"Unexpected error: {str(e)}")
			
 
				-#                     results.append({
			
 
				-#                         "product_id": "unknown",
			
 
				-#                         "error": str(e)
			
 
				-#                     })
			
 
				-
			
 
				-#         batch_result = {
			
 
				-#             "results": results,
			
 
				-#             "total_products": len(product_list),
			
 
				-#             "successful": successful,
			
 
				-#             "failed": failed
			
 
				-#         }
			
 
				-
			
 
				-#         response_serializer = BatchProductResponseSerializer(data=batch_result)
			
 
				-#         if response_serializer.is_valid():
			
 
				-#             return Response(response_serializer.data, status=status.HTTP_200_OK)
			
 
				-
			
 
				-#         return Response(batch_result, status=status.HTTP_200_OK)
			
 
				-
			
 
				-
			
 
				 # ==================== OPTIMIZED BATCH VIEW ====================
			
 
				-import concurrent.futures
			
 
				 
			
 
				 class BatchExtractProductAttributesView(APIView):
			
 
				     """
			
@@ -773,7 +419,6 @@ class BatchExtractProductAttributesView(APIView):
 
				         return Response(batch_result, status=status.HTTP_200_OK)
			
 
				 
			
 
				 
			
 
				-
			
 
				 class ProductListView(APIView):
			
 
				     """
			
 
				     GET API to list all products with details
			
@@ -783,80 +428,6 @@ class ProductListView(APIView):
 
				         serializer = ProductSerializer(products, many=True)
			
 
				         return Response(serializer.data, status=status.HTTP_200_OK)
			
 
				 
			
 
				-
			
 
				-from rest_framework.views import APIView
			
 
				-from rest_framework.response import Response
			
 
				-from rest_framework import status
			
 
				-from rest_framework.parsers import MultiPartParser, FormParser
			
 
				-import pandas as pd
			
 
				-from .models import Product
			
 
				-
			
 
				-
			
 
				-from rest_framework.views import APIView
			
 
				-from rest_framework.response import Response
			
 
				-from rest_framework import status
			
 
				-from rest_framework.parsers import MultiPartParser, FormParser
			
 
				-from django.db import transaction
			
 
				-import pandas as pd
			
 
				-from .models import Product, ProductAttributeValue
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-import logging
			
 
				-import json
			
 
				-from rest_framework.views import APIView
			
 
				-from rest_framework.response import Response
			
 
				-from rest_framework import status
			
 
				-from rest_framework.parsers import MultiPartParser, FormParser
			
 
				-from django.db import transaction
			
 
				-from django.db.models import Prefetch
			
 
				-import pandas as pd
			
 
				-# Import ALL your models
			
 
				-from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
			
 
				-from .services import ProductAttributeService
			
 
				-from .ocr_service import OCRService
			
 
				-from .visual_processing_service import VisualProcessingService
			
 
				-from openpyxl import Workbook
			
 
				-from openpyxl.styles import Font, PatternFill, Alignment
			
 
				-from django.conf import settings
			
 
				-import os
			
 
				-import threading
			
 
				-from datetime import datetime
			
 
				-
			
 
				-# --- Configuration for Generated Outputs Folder ---
			
 
				-OUTPUT_FOLDER_NAME = 'generated_outputs'
			
 
				-OUTPUT_ROOT = os.path.join(settings.MEDIA_ROOT, OUTPUT_FOLDER_NAME)
			
 
				-OUTPUT_URL = os.path.join(settings.MEDIA_URL, OUTPUT_FOLDER_NAME).replace('\\', '/') # Use forward slashes for URL
			
 
				-
			
 
				-# Define log and status file paths within the new subfolder
			
 
				-LOG_FILE_NAME = 'excel_generation.log'
			
 
				-STATUS_FILE_NAME = 'excel_generation_status.json'
			
 
				-EXCEL_FILE_NAME = 'generated_products.xlsx'
			
 
				-
			
 
				-LOG_FILE_PATH = os.path.join(OUTPUT_ROOT, LOG_FILE_NAME)
			
 
				-STATUS_FILE_PATH = os.path.join(OUTPUT_ROOT, STATUS_FILE_NAME)
			
 
				-EXCEL_FILE_PATH = os.path.join(OUTPUT_ROOT, EXCEL_FILE_NAME)
			
 
				-
			
 
				-# Ensure the OUTPUT_ROOT exists for files to be saved
			
 
				-if not os.path.exists(OUTPUT_ROOT):
			
 
				-    os.makedirs(OUTPUT_ROOT)
			
 
				-
			
 
				-# Configure basic logging to the new path
			
 
				-logging.basicConfig(
			
 
				-    filename=LOG_FILE_PATH,
			
 
				-    level=logging.INFO,
			
 
				-    format='%(asctime)s - %(levelname)s - %(message)s'
			
 
				-)
			
 
				-logger = logging.getLogger(__name__)
			
 
				-
			
 
				 # -------------------------------------------------------------------------------------------------
			
 
				 
			
 
				 def generate_product_excel_background():
			
@@ -1199,7 +770,6 @@ def generate_product_excel_background():
 
				         logger.exception("CRITICAL ERROR during background Excel generation process.")
			
 
				         write_status("FAILED", error_msg=str(e))
			
 
				 
			
 
				-
			
 
				 # -------------------------------------------------------------------------------------------------
			
 
				 
			
 
				 class ProductUploadExcelView(APIView):
			
@@ -1381,15 +951,6 @@ class ProductUploadExcelView(APIView):
 
				             return Response({'error': f'An unexpected error occurred while processing the file: {str(e)}'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
			
 
				 
			
 
				 
			
 
				-# Add this view to your views.py for downloading a template
			
 
				-
			
 
				-from django.http import HttpResponse
			
 
				-from openpyxl import Workbook
			
 
				-from openpyxl.styles import Font, PatternFill, Alignment
			
 
				-from rest_framework.views import APIView
			
 
				-import io
			
 
				-
			
 
				-
			
 
				 class DownloadExcelTemplateView(APIView):
			
 
				     """
			
 
				     GET API to download an Excel template with two sheets:
			
@@ -1921,24 +1482,6 @@ class ProductTypeListView(APIView):
 
				         return Response({"product_types": list(product_types)}, status=status.HTTP_200_OK)
			
 
				     
			
 
				 
			
 
				-
			
 
				-# Add these views to your views.py
			
 
				-
			
 
				-from rest_framework.views import APIView
			
 
				-from rest_framework.response import Response
			
 
				-from rest_framework import status
			
 
				-from rest_framework.parsers import MultiPartParser, FormParser
			
 
				-from django.db import transaction
			
 
				-import pandas as pd
			
 
				-from .models import Product, ProductAttributeValue
			
 
				-from .serializers import (
			
 
				-    ProductAttributeValueSerializer,
			
 
				-    ProductAttributeValueInputSerializer,
			
 
				-    BulkProductAttributeValueSerializer,
			
 
				-    ProductWithAttributesSerializer
			
 
				-)
			
 
				-
			
 
				-
			
 
				 class ProductAttributeValueView(APIView):
			
 
				     """
			
 
				     API to manage manually entered original attribute values.