3 kuukautta sitten · 780e17ac26
--- a/attr_extraction/services.py
+++ b/attr_extraction/services.py
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -132,11 +132,14 @@ class ExtractProductAttributesView(APIView):
 
															         return Response(result, status=status.HTTP_200_OK)
														
 
															+
														
 
															+# Replace the BatchExtractProductAttributesView in your views.py with this updated version
														
 
															+
														
 
															 # class BatchExtractProductAttributesView(APIView):
														
 
															 #     """
														
 
															 #     API endpoint to extract product attributes for multiple products in batch.
														
 
															 #     Uses item-specific mandatory_attrs with source tracking.
														
 
															-#     Returns attributes in array format: [{"value": "...", "source": "..."}]
														
 
															+#     Returns attributes in array format with original_value field.
														
 
															 #     Includes OCR and Visual Processing results.
														
 
															 #     """
														
@@ -147,15 +150,6 @@ class ExtractProductAttributesView(APIView):
 
															 #         validated_data = serializer.validated_data
														
 
															-#         # DEBUG: Print what we received
														
 
															-#         print("\n" + "="*80)
														
 
															-#         print("BATCH REQUEST - RECEIVED DATA")
														
 
															-#         print("="*80)
														
 
															-#         print(f"Raw request data keys: {request.data.keys()}")
														
 
															-#         print(f"Multiple field in request: {request.data.get('multiple')}")
														
 
															-#         print(f"Validated multiple field: {validated_data.get('multiple')}")
														
 
															-#         print("="*80 + "\n")
														
 
															-        
														
 
															 #         # Get batch-level settings
														
 
															 #         product_list = validated_data.get("products", [])
														
 
															 #         model = validated_data.get("model")
														
@@ -168,27 +162,33 @@ class ExtractProductAttributesView(APIView):
 
															 #         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
														
 
															 #         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
														
 
															-#         # DEBUG: Print extracted settings
														
 
															-#         print(f"Extracted multiple parameter: {multiple}")
														
 
															-#         print(f"Type: {type(multiple)}")
														
 
															-        
														
 
															 #         # Extract all item_ids to query the database efficiently
														
 
															 #         item_ids = [p['item_id'] for p in product_list] 
														
 
															 #         # Fetch all products in one query
														
 
															 #         products_queryset = Product.objects.filter(item_id__in=item_ids)
														
 
															-        
														
 
															-#         # Create a dictionary for easy lookup: item_id -> Product object
														
 
															 #         product_map = {product.item_id: product for product in products_queryset}
														
 
															 #         found_ids = set(product_map.keys())
														
 
															+#         # Fetch all original attribute values for these products in one query
														
 
															+#         original_values_qs = ProductAttributeValue.objects.filter(
														
 
															+#             product__item_id__in=item_ids
														
 
															+#         ).select_related('product')
														
 
															+        
														
 
															+#         # Create a nested dictionary: {item_id: {attribute_name: original_value}}
														
 
															+#         original_values_map = {}
														
 
															+#         for attr_val in original_values_qs:
														
 
															+#             item_id = attr_val.product.item_id
														
 
															+#             if item_id not in original_values_map:
														
 
															+#                 original_values_map[item_id] = {}
														
 
															+#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
														
 
															+        
														
 
															 #         results = []
														
 
															 #         successful = 0
														
 
															 #         failed = 0
														
 
															 #         for product_entry in product_list:
														
 
															 #             item_id = product_entry['item_id']
														
 
															-#             # Get item-specific mandatory attributes
														
 
															 #             mandatory_attrs = product_entry['mandatory_attrs'] 
														
 
															 #             if item_id not in found_ids:
														
@@ -206,7 +206,7 @@ class ExtractProductAttributesView(APIView):
 
															 #                 short_desc = product.product_short_description
														
 
															 #                 long_desc = product.product_long_description
														
 
															 #                 image_url = product.image_path
														
 
															-#                 # image_url = "https://images.unsplash.com/photo-1595777457583-95e059d581b8"
														
 
															+                
														
 
															 #                 ocr_results = None
														
 
															 #                 ocr_text = None
														
 
															 #                 visual_results = None
														
@@ -216,7 +216,6 @@ class ExtractProductAttributesView(APIView):
 
															 #                     # OCR Processing
														
 
															 #                     ocr_service = OCRService()
														
 
															 #                     ocr_results = ocr_service.process_image(image_url)
														
 
															-#                     print(f"OCR results for {item_id}: {ocr_results}")
														
 
															 #                     if ocr_results and ocr_results.get("detected_text"):
														
 
															 #                         ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
														
@@ -232,7 +231,6 @@ class ExtractProductAttributesView(APIView):
 
															 #                     visual_service = VisualProcessingService()
														
 
															 #                     product_type_hint = product.product_type if hasattr(product, 'product_type') else None
														
 
															 #                     visual_results = visual_service.process_image(image_url, product_type_hint)
														
 
															-#                     print(f"Visual results for {item_id}: {visual_results.get('visual_attributes', {})}")
														
 
															 #                     # Format visual attributes to array format with source tracking
														
 
															 #                     if visual_results and visual_results.get('visual_attributes'):
														
@@ -248,10 +246,6 @@ class ExtractProductAttributesView(APIView):
 
															 #                     ocr_text=ocr_text
														
 
															 #                 )
														
 
															-#                 # DEBUG: Print before extraction
														
 
															-#                 print(f"\n>>> Extracting for product {item_id}")
														
 
															-#                 print(f"    Passing multiple: {multiple}")
														
 
															-
														
 
															 #                 # Attribute Extraction with source tracking (returns array format)
														
 
															 #                 extracted = ProductAttributeService.extract_attributes(
														
 
															 #                     product_text=product_text,
														
@@ -267,6 +261,25 @@ class ExtractProductAttributesView(APIView):
 
															 #                     use_semantic_clustering=use_semantic_clustering
														
 
															 #                 )
														
 
															+#                 # Add original_value to each extracted attribute
														
 
															+#                 original_attrs = original_values_map.get(item_id, {})
														
 
															+                
														
 
															+#                 # Process mandatory attributes
														
 
															+#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
														
 
															+#                     if isinstance(attr_values, list):
														
 
															+#                         for attr_obj in attr_values:
														
 
															+#                             if isinstance(attr_obj, dict):
														
 
															+#                                 # Add original_value if it exists
														
 
															+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															+                
														
 
															+#                 # Process additional attributes
														
 
															+#                 for attr_name, attr_values in extracted.get("additional", {}).items():
														
 
															+#                     if isinstance(attr_values, list):
														
 
															+#                         for attr_obj in attr_values:
														
 
															+#                             if isinstance(attr_obj, dict):
														
 
															+#                                 # Add original_value if it exists
														
 
															+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															+
														
 
															 #                 result = {
														
 
															 #                     "product_id": product.item_id,
														
 
															 #                     "mandatory": extracted.get("mandatory", {}),
														
@@ -305,51 +318,244 @@ class ExtractProductAttributesView(APIView):
 
															 #         return Response(batch_result, status=status.HTTP_200_OK)
														
 
															+# views.py - OPTIMIZED WITHOUT REDIS/CELERY
														
 
															+# class BatchExtractProductAttributesView(APIView):
														
 
															+#     """
														
 
															+#     Optimized batch extraction using ThreadPoolExecutor (built-in Python)
														
 
															+#     """
														
 
															-# Replace the BatchExtractProductAttributesView in your views.py with this updated version
														
 
															+#     def post(self, request):
														
 
															+#         serializer = BatchProductRequestSerializer(data=request.data)
														
 
															+#         if not serializer.is_valid():
														
 
															+#             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
														
 
															+
														
 
															+#         validated_data = serializer.validated_data
														
 
															+#         product_list = validated_data.get("products", [])
														
 
															+        
														
 
															+#         # OPTIMIZATION 1: Single optimized database query
														
 
															+#         item_ids = [p['item_id'] for p in product_list]
														
 
															+#         products_queryset = Product.objects.filter(
														
 
															+#             item_id__in=item_ids
														
 
															+#         ).prefetch_related('attribute_values')  # Single query!
														
 
															+        
														
 
															+#         product_map = {product.item_id: product for product in products_queryset}
														
 
															+        
														
 
															+#         # OPTIMIZATION 2: Prefetch ALL original attribute values in ONE query
														
 
															+#         original_values_qs = ProductAttributeValue.objects.filter(
														
 
															+#             product__item_id__in=item_ids
														
 
															+#         ).select_related('product')
														
 
															+        
														
 
															+#         original_values_map = {}
														
 
															+#         for attr_val in original_values_qs:
														
 
															+#             item_id = attr_val.product.item_id
														
 
															+#             if item_id not in original_values_map:
														
 
															+#                 original_values_map[item_id] = {}
														
 
															+#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
														
 
															+        
														
 
															+#         # Extract settings
														
 
															+#         model = validated_data.get("model")
														
 
															+#         extract_additional = validated_data.get("extract_additional", True)
														
 
															+#         process_image = validated_data.get("process_image", True)
														
 
															+#         multiple = validated_data.get("multiple", [])
														
 
															+#         threshold_abs = validated_data.get("threshold_abs", 0.65)
														
 
															+#         margin = validated_data.get("margin", 0.15)
														
 
															+#         use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
														
 
															+#         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
														
 
															+#         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
														
 
															+        
														
 
															+#         results = []
														
 
															+#         successful = 0
														
 
															+#         failed = 0
														
 
															+        
														
 
															+#         # OPTIMIZATION 3: Initialize services once
														
 
															+#         ocr_service = OCRService() if process_image else None
														
 
															+#         visual_service = VisualProcessingService() if process_image else None
														
 
															+
														
 
															+#         # OPTIMIZATION 4: Process in parallel using ThreadPoolExecutor
														
 
															+#         def process_single_product(product_entry):
														
 
															+#             """Process a single product (runs in parallel)"""
														
 
															+#             item_id = product_entry['item_id']
														
 
															+#             mandatory_attrs = product_entry['mandatory_attrs']
														
 
															+
														
 
															+#             if item_id not in product_map:
														
 
															+#                 return {
														
 
															+#                     "product_id": item_id,
														
 
															+#                     "error": "Product not found in database"
														
 
															+#                 }, False
														
 
															+
														
 
															+#             product = product_map[item_id]
														
 
															+            
														
 
															+#             try:
														
 
															+#                 title = product.product_name
														
 
															+#                 short_desc = product.product_short_description
														
 
															+#                 long_desc = product.product_long_description
														
 
															+#                 image_url = product.image_path
														
 
															+                
														
 
															+#                 ocr_results = None
														
 
															+#                 ocr_text = None
														
 
															+#                 visual_results = None
														
 
															+
														
 
															+#                 # Image processing (if enabled)
														
 
															+#                 if process_image and image_url:
														
 
															+#                     if ocr_service:
														
 
															+#                         ocr_results = ocr_service.process_image(image_url)
														
 
															+                        
														
 
															+#                         if ocr_results and ocr_results.get("detected_text"):
														
 
															+#                             ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
														
 
															+#                                 ocr_results, model
														
 
															+#                             )
														
 
															+#                             ocr_results["extracted_attributes"] = ocr_attrs
														
 
															+#                             ocr_text = "\n".join([
														
 
															+#                                 f"{item['text']} (confidence: {item['confidence']:.2f})"
														
 
															+#                                 for item in ocr_results["detected_text"]
														
 
															+#                             ])
														
 
															+                    
														
 
															+#                     if visual_service:
														
 
															+#                         product_type_hint = product.product_type if hasattr(product, 'product_type') else None
														
 
															+#                         visual_results = visual_service.process_image(image_url, product_type_hint)
														
 
															+                        
														
 
															+#                         if visual_results and visual_results.get('visual_attributes'):
														
 
															+#                             visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
														
 
															+#                                 visual_results['visual_attributes']
														
 
															+#                             )
														
 
															+
														
 
															+#                 # Combine product text with source tracking
														
 
															+#                 product_text, source_map = ProductAttributeService.combine_product_text(
														
 
															+#                     title=title,
														
 
															+#                     short_desc=short_desc,
														
 
															+#                     long_desc=long_desc,
														
 
															+#                     ocr_text=ocr_text
														
 
															+#                 )
														
 
															+
														
 
															+#                 # Extract attributes (WITH CACHING ENABLED)
														
 
															+#                 extracted = ProductAttributeService.extract_attributes(
														
 
															+#                     product_text=product_text,
														
 
															+#                     mandatory_attrs=mandatory_attrs,
														
 
															+#                     source_map=source_map,
														
 
															+#                     model=model,
														
 
															+#                     extract_additional=extract_additional,
														
 
															+#                     multiple=multiple,
														
 
															+#                     threshold_abs=threshold_abs,
														
 
															+#                     margin=margin,
														
 
															+#                     use_dynamic_thresholds=use_dynamic_thresholds,
														
 
															+#                     use_adaptive_margin=use_adaptive_margin,
														
 
															+#                     use_semantic_clustering=use_semantic_clustering,
														
 
															+#                     use_cache=True  # Enable caching!
														
 
															+#                 )
														
 
															+
														
 
															+#                 # Add original values
														
 
															+#                 original_attrs = original_values_map.get(item_id, {})
														
 
															+                
														
 
															+#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
														
 
															+#                     if isinstance(attr_values, list):
														
 
															+#                         for attr_obj in attr_values:
														
 
															+#                             if isinstance(attr_obj, dict):
														
 
															+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															+                
														
 
															+#                 for attr_name, attr_values in extracted.get("additional", {}).items():
														
 
															+#                     if isinstance(attr_values, list):
														
 
															+#                         for attr_obj in attr_values:
														
 
															+#                             if isinstance(attr_obj, dict):
														
 
															+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															+
														
 
															+#                 result = {
														
 
															+#                     "product_id": product.item_id,
														
 
															+#                     "mandatory": extracted.get("mandatory", {}),
														
 
															+#                     "additional": extracted.get("additional", {}),
														
 
															+#                 }
														
 
															+
														
 
															+#                 if ocr_results:
														
 
															+#                     result["ocr_results"] = ocr_results
														
 
															+                
														
 
															+#                 if visual_results:
														
 
															+#                     result["visual_results"] = visual_results
														
 
															+
														
 
															+#                 return result, True
														
 
															+
														
 
															+#             except Exception as e:
														
 
															+#                 return {
														
 
															+#                     "product_id": item_id,
														
 
															+#                     "error": str(e)
														
 
															+#                 }, False
														
 
															+
														
 
															+#         # OPTIMIZATION 5: Use ThreadPoolExecutor for parallel processing
														
 
															+#         import concurrent.futures
														
 
															+#         max_workers = min(10, len(product_list))  # Up to 10 parallel workers
														
 
															+        
														
 
															+#         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
														
 
															+#             # Submit all tasks
														
 
															+#             future_to_product = {
														
 
															+#                 executor.submit(process_single_product, product): product
														
 
															+#                 for product in product_list
														
 
															+#             }
														
 
															+            
														
 
															+#             # Collect results as they complete
														
 
															+#             for future in concurrent.futures.as_completed(future_to_product):
														
 
															+#                 try:
														
 
															+#                     result, success = future.result()
														
 
															+#                     results.append(result)
														
 
															+#                     if success:
														
 
															+#                         successful += 1
														
 
															+#                     else:
														
 
															+#                         failed += 1
														
 
															+#                 except Exception as e:
														
 
															+#                     failed += 1
														
 
															+#                     logger.error(f"Unexpected error: {str(e)}")
														
 
															+#                     results.append({
														
 
															+#                         "product_id": "unknown",
														
 
															+#                         "error": str(e)
														
 
															+#                     })
														
 
															+
														
 
															+#         batch_result = {
														
 
															+#             "results": results,
														
 
															+#             "total_products": len(product_list),
														
 
															+#             "successful": successful,
														
 
															+#             "failed": failed
														
 
															+#         }
														
 
															+
														
 
															+#         response_serializer = BatchProductResponseSerializer(data=batch_result)
														
 
															+#         if response_serializer.is_valid():
														
 
															+#             return Response(response_serializer.data, status=status.HTTP_200_OK)
														
 
															+
														
 
															+#         return Response(batch_result, status=status.HTTP_200_OK)
														
 
															+
														
 
															+
														
 
															+# ==================== OPTIMIZED BATCH VIEW ====================
														
 
															+import concurrent.futures
														
 
															 class BatchExtractProductAttributesView(APIView):
														
 
															     """
														
 
															-    API endpoint to extract product attributes for multiple products in batch.
														
 
															-    Uses item-specific mandatory_attrs with source tracking.
														
 
															-    Returns attributes in array format with original_value field.
														
 
															-    Includes OCR and Visual Processing results.
														
 
															+    ⚡ PERFORMANCE OPTIMIZED: Batch extraction with intelligent parallelization
														
 
															+    Expected performance: 10 products in 30-60 seconds (with image processing)
														
 
															     """
														
 
															     def post(self, request):
														
 
															+        import time
														
 
															+        start_time = time.time()
														
 
															+        
														
 
															         serializer = BatchProductRequestSerializer(data=request.data)
														
 
															         if not serializer.is_valid():
														
 
															             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
														
 
															         validated_data = serializer.validated_data
														
 
															-        
														
 
															-        # Get batch-level settings
														
 
															         product_list = validated_data.get("products", [])
														
 
															-        model = validated_data.get("model")
														
 
															-        extract_additional = validated_data.get("extract_additional", True)
														
 
															-        process_image = validated_data.get("process_image", True)
														
 
															-        multiple = validated_data.get("multiple", [])
														
 
															-        threshold_abs = validated_data.get("threshold_abs", 0.65)
														
 
															-        margin = validated_data.get("margin", 0.15)
														
 
															-        use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
														
 
															-        use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
														
 
															-        use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
														
 
															-        # Extract all item_ids to query the database efficiently
														
 
															-        item_ids = [p['item_id'] for p in product_list] 
														
 
															+        logger.info(f"🚀 Starting batch processing for {len(product_list)} products")
														
 
															+        
														
 
															+        # ==================== OPTIMIZATION 1: Bulk DB Query ====================
														
 
															+        item_ids = [p['item_id'] for p in product_list]
														
 
															+        products_queryset = Product.objects.filter(
														
 
															+            item_id__in=item_ids
														
 
															+        ).prefetch_related('attribute_values')
														
 
															-        # Fetch all products in one query
														
 
															-        products_queryset = Product.objects.filter(item_id__in=item_ids)
														
 
															         product_map = {product.item_id: product for product in products_queryset}
														
 
															-        found_ids = set(product_map.keys())
														
 
															-        # Fetch all original attribute values for these products in one query
														
 
															+        # Prefetch ALL original attribute values in ONE query
														
 
															         original_values_qs = ProductAttributeValue.objects.filter(
														
 
															             product__item_id__in=item_ids
														
 
															         ).select_related('product')
														
 
															-        # Create a nested dictionary: {item_id: {attribute_name: original_value}}
														
 
															         original_values_map = {}
														
 
															         for attr_val in original_values_qs:
														
 
															             item_id = attr_val.product.item_id
														
@@ -357,25 +563,53 @@ class BatchExtractProductAttributesView(APIView):
 
															                 original_values_map[item_id] = {}
														
 
															             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
														
 
															+        logger.info(f"✓ Loaded {len(product_map)} products from database")
														
 
															+        
														
 
															+        # Extract settings
														
 
															+        model = validated_data.get("model")
														
 
															+        extract_additional = validated_data.get("extract_additional", True)
														
 
															+        process_image = validated_data.get("process_image", True)
														
 
															+        multiple = validated_data.get("multiple", [])
														
 
															+        threshold_abs = validated_data.get("threshold_abs", 0.65)
														
 
															+        margin = validated_data.get("margin", 0.15)
														
 
															+        use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
														
 
															+        use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
														
 
															+        use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
														
 
															+        
														
 
															         results = []
														
 
															         successful = 0
														
 
															         failed = 0
														
 
															+        
														
 
															+        # ==================== OPTIMIZATION 2: Conditional Service Init ====================
														
 
															+        # Only initialize if processing images
														
 
															+        ocr_service = None
														
 
															+        visual_service = None
														
 
															+        
														
 
															+        if process_image:
														
 
															+            from .ocr_service import OCRService
														
 
															+            from .visual_processing_service import VisualProcessingService
														
 
															+            ocr_service = OCRService()
														
 
															+            visual_service = VisualProcessingService()
														
 
															+            logger.info("✓ Image processing services initialized")
														
 
															-        for product_entry in product_list:
														
 
															+        # ==================== OPTIMIZATION 3: Smart Parallelization ====================
														
 
															+        def process_single_product(product_entry):
														
 
															+            """Process a single product (runs in parallel)"""
														
 
															+            import time
														
 
															+            product_start = time.time()
														
 
															+            
														
 
															             item_id = product_entry['item_id']
														
 
															-            mandatory_attrs = product_entry['mandatory_attrs'] 
														
 
															+            mandatory_attrs = product_entry['mandatory_attrs']
														
 
															-            if item_id not in found_ids:
														
 
															-                failed += 1
														
 
															-                results.append({
														
 
															+            if item_id not in product_map:
														
 
															+                return {
														
 
															                     "product_id": item_id,
														
 
															                     "error": "Product not found in database"
														
 
															-                })
														
 
															-                continue
														
 
															+                }, False
														
 
															             product = product_map[item_id]
														
 
															-            try: 
														
 
															+            try:
														
 
															                 title = product.product_name
														
 
															                 short_desc = product.product_short_description
														
 
															                 long_desc = product.product_long_description
														
@@ -385,32 +619,29 @@ class BatchExtractProductAttributesView(APIView):
 
															                 ocr_text = None
														
 
															                 visual_results = None
														
 
															-                # Image Processing Logic
														
 
															+                # ⚡ SKIP IMAGE PROCESSING IF DISABLED (HUGE TIME SAVER)
														
 
															                 if process_image and image_url:
														
 
															-                    # OCR Processing
														
 
															-                    ocr_service = OCRService()
														
 
															-                    ocr_results = ocr_service.process_image(image_url)
														
 
															-                    
														
 
															-                    if ocr_results and ocr_results.get("detected_text"):
														
 
															-                        ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
														
 
															-                            ocr_results, model
														
 
															-                        )
														
 
															-                        ocr_results["extracted_attributes"] = ocr_attrs
														
 
															-                        ocr_text = "\n".join([
														
 
															-                            f"{item['text']} (confidence: {item['confidence']:.2f})"
														
 
															-                            for item in ocr_results["detected_text"]
														
 
															-                        ])
														
 
															+                    if ocr_service:
														
 
															+                        ocr_results = ocr_service.process_image(image_url)
														
 
															+                        
														
 
															+                        if ocr_results and ocr_results.get("detected_text"):
														
 
															+                            ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
														
 
															+                                ocr_results, model
														
 
															+                            )
														
 
															+                            ocr_results["extracted_attributes"] = ocr_attrs
														
 
															+                            ocr_text = "\n".join([
														
 
															+                                f"{item['text']} (confidence: {item['confidence']:.2f})"
														
 
															+                                for item in ocr_results["detected_text"]
														
 
															+                            ])
														
 
															-                    # Visual Processing
														
 
															-                    visual_service = VisualProcessingService()
														
 
															-                    product_type_hint = product.product_type if hasattr(product, 'product_type') else None
														
 
															-                    visual_results = visual_service.process_image(image_url, product_type_hint)
														
 
															-                    
														
 
															-                    # Format visual attributes to array format with source tracking
														
 
															-                    if visual_results and visual_results.get('visual_attributes'):
														
 
															-                        visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
														
 
															-                            visual_results['visual_attributes']
														
 
															-                        )
														
 
															+                    if visual_service:
														
 
															+                        product_type_hint = product.product_type if hasattr(product, 'product_type') else None
														
 
															+                        visual_results = visual_service.process_image(image_url, product_type_hint)
														
 
															+                        
														
 
															+                        if visual_results and visual_results.get('visual_attributes'):
														
 
															+                            visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
														
 
															+                                visual_results['visual_attributes']
														
 
															+                            )
														
 
															                 # Combine product text with source tracking
														
 
															                 product_text, source_map = ProductAttributeService.combine_product_text(
														
@@ -420,7 +651,7 @@ class BatchExtractProductAttributesView(APIView):
 
															                     ocr_text=ocr_text
														
 
															                 )
														
 
															-                # Attribute Extraction with source tracking (returns array format)
														
 
															+                # ⚡ EXTRACT ATTRIBUTES WITH CACHING ENABLED
														
 
															                 extracted = ProductAttributeService.extract_attributes(
														
 
															                     product_text=product_text,
														
 
															                     mandatory_attrs=mandatory_attrs,
														
@@ -432,26 +663,23 @@ class BatchExtractProductAttributesView(APIView):
 
															                     margin=margin,
														
 
															                     use_dynamic_thresholds=use_dynamic_thresholds,
														
 
															                     use_adaptive_margin=use_adaptive_margin,
														
 
															-                    use_semantic_clustering=use_semantic_clustering
														
 
															+                    use_semantic_clustering=use_semantic_clustering,
														
 
															+                    use_cache=True  # ⚡ CRITICAL: Enable caching
														
 
															                 )
														
 
															-                # Add original_value to each extracted attribute
														
 
															+                # Add original values
														
 
															                 original_attrs = original_values_map.get(item_id, {})
														
 
															-                # Process mandatory attributes
														
 
															                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
														
 
															                     if isinstance(attr_values, list):
														
 
															                         for attr_obj in attr_values:
														
 
															                             if isinstance(attr_obj, dict):
														
 
															-                                # Add original_value if it exists
														
 
															                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															-                # Process additional attributes
														
 
															                 for attr_name, attr_values in extracted.get("additional", {}).items():
														
 
															                     if isinstance(attr_values, list):
														
 
															                         for attr_obj in attr_values:
														
 
															                             if isinstance(attr_obj, dict):
														
 
															-                                # Add original_value if it exists
														
 
															                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															                 result = {
														
@@ -460,29 +688,82 @@ class BatchExtractProductAttributesView(APIView):
 
															                     "additional": extracted.get("additional", {}),
														
 
															                 }
														
 
															-                # Attach OCR results if available
														
 
															                 if ocr_results:
														
 
															                     result["ocr_results"] = ocr_results
														
 
															-                # Attach Visual Processing results if available
														
 
															                 if visual_results:
														
 
															                     result["visual_results"] = visual_results
														
 
															+                
														
 
															+                processing_time = time.time() - product_start
														
 
															+                logger.info(f"✓ Processed {item_id} in {processing_time:.2f}s")
														
 
															-                results.append(result)
														
 
															-                successful += 1
														
 
															+                return result, True
														
 
															             except Exception as e:
														
 
															-                failed += 1
														
 
															-                results.append({
														
 
															+                logger.error(f"❌ Error processing {item_id}: {str(e)}")
														
 
															+                return {
														
 
															                     "product_id": item_id,
														
 
															                     "error": str(e)
														
 
															-                })
														
 
															+                }, False
														
 
															+
														
 
															+        # ==================== OPTIMIZATION 4: Parallel Execution ====================
														
 
															+        # Adjust workers based on whether image processing is enabled
														
 
															+        max_workers = min(3 if process_image else 10, len(product_list))
														
 
															+        
														
 
															+        logger.info(f"⚡ Using {max_workers} parallel workers")
														
 
															+        
														
 
															+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
														
 
															+            # Submit all tasks
														
 
															+            future_to_product = {
														
 
															+                executor.submit(process_single_product, product): product
														
 
															+                for product in product_list
														
 
															+            }
														
 
															+            
														
 
															+            # Collect results as they complete
														
 
															+            for future in concurrent.futures.as_completed(future_to_product):
														
 
															+                try:
														
 
															+                    result, success = future.result()
														
 
															+                    results.append(result)
														
 
															+                    if success:
														
 
															+                        successful += 1
														
 
															+                    else:
														
 
															+                        failed += 1
														
 
															+                except Exception as e:
														
 
															+                    failed += 1
														
 
															+                    logger.error(f"❌ Future execution error: {str(e)}")
														
 
															+                    results.append({
														
 
															+                        "product_id": "unknown",
														
 
															+                        "error": str(e)
														
 
															+                    })
														
 
															+
														
 
															+        total_time = time.time() - start_time
														
 
															+        
														
 
															+        # Get cache statistics
														
 
															+        cache_stats = ProductAttributeService.get_cache_stats()
														
 
															+        
														
 
															+        logger.info(f"""
														
 
															+🎉 BATCH PROCESSING COMPLETE
														
 
															+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
														
 
															+  Total products: {len(product_list)}
														
 
															+  Successful: {successful}
														
 
															+  Failed: {failed}
														
 
															+  Total time: {total_time:.2f}s
														
 
															+  Avg time/product: {total_time/len(product_list):.2f}s
														
 
															+  Cache hit rate: {cache_stats['embedding_cache']['hit_rate_percent']:.1f}%
														
 
															+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
														
 
															+        """)
														
 
															         batch_result = {
														
 
															             "results": results,
														
 
															             "total_products": len(product_list),
														
 
															             "successful": successful,
														
 
															-            "failed": failed
														
 
															+            "failed": failed,
														
 
															+            "performance": {
														
 
															+                "total_time_seconds": round(total_time, 2),
														
 
															+                "avg_time_per_product": round(total_time / len(product_list), 2),
														
 
															+                "workers_used": max_workers
														
 
															+            },
														
 
															+            "cache_stats": cache_stats
														
 
															         }
														
 
															         response_serializer = BatchProductResponseSerializer(data=batch_result)
														
@@ -493,7 +774,6 @@ class BatchExtractProductAttributesView(APIView):
 
															-
														
 
															 class ProductListView(APIView):
														
 
															     """
														
 
															     GET API to list all products with details
														
@@ -512,77 +792,6 @@ import pandas as pd
 
															 from .models import Product
														
 
															-# class ProductUploadExcelView(APIView):
														
 
															-#     """
														
 
															-#     POST API to upload an Excel file and add/update data in Product model.
														
 
															-#     - Creates new records if they don't exist.
														
 
															-#     - Updates existing ones (e.g., when image_path or other fields change).
														
 
															-#     """
														
 
															-#     parser_classes = (MultiPartParser, FormParser)
														
 
															-
														
 
															-#     def post(self, request, *args, **kwargs):
														
 
															-#         file_obj = request.FILES.get('file')
														
 
															-#         if not file_obj:
														
 
															-#             return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
														
 
															-
														
 
															-#         try:
														
 
															-#             # Read Excel into DataFrame
														
 
															-#             df = pd.read_excel(file_obj)
														
 
															-#             df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
														
 
															-
														
 
															-#             expected_cols = {
														
 
															-#                 'item_id',
														
 
															-#                 'product_name',
														
 
															-#                 'product_long_description',
														
 
															-#                 'product_short_description',
														
 
															-#                 'product_type',
														
 
															-#                 'image_path'
														
 
															-#             }
														
 
															-
														
 
															-#             # Check required columns
														
 
															-#             if not expected_cols.issubset(df.columns):
														
 
															-#                 return Response({
														
 
															-#                     'error': 'Missing required columns',
														
 
															-#                     'required_columns': list(expected_cols)
														
 
															-#                 }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-
														
 
															-#             created_count = 0
														
 
															-#             updated_count = 0
														
 
															-
														
 
															-#             # Loop through rows and update or create
														
 
															-#             for _, row in df.iterrows():
														
 
															-#                 item_id = str(row.get('item_id', '')).strip()
														
 
															-#                 if not item_id:
														
 
															-#                     continue  # Skip rows without an item_id
														
 
															-
														
 
															-#                 defaults = {
														
 
															-#                     'product_name': row.get('product_name', ''),
														
 
															-#                     'product_long_description': row.get('product_long_description', ''),
														
 
															-#                     'product_short_description': row.get('product_short_description', ''),
														
 
															-#                     'product_type': row.get('product_type', ''),
														
 
															-#                     'image_path': row.get('image_path', ''),
														
 
															-#                 }
														
 
															-
														
 
															-#                 obj, created = Product.objects.update_or_create(
														
 
															-#                     item_id=item_id,
														
 
															-#                     defaults=defaults
														
 
															-#                 )
														
 
															-
														
 
															-#                 if created:
														
 
															-#                     created_count += 1
														
 
															-#                 else:
														
 
															-#                     updated_count += 1
														
 
															-
														
 
															-#             return Response({
														
 
															-#                 'message': f'Upload successful.',
														
 
															-#                 'created': f'{created_count} new records added.',
														
 
															-#                 'updated': f'{updated_count} existing records updated.'
														
 
															-#             }, status=status.HTTP_201_CREATED)
														
 
															-
														
 
															-#         except Exception as e:
														
 
															-#             return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
														
 
															-# Replace the ProductUploadExcelView in your views.py with this updated version
														
 
															-
														
 
															 from rest_framework.views import APIView
														
 
															 from rest_framework.response import Response
														
 
															 from rest_framework import status
														
@@ -592,835 +801,12 @@ import pandas as pd
 
															 from .models import Product, ProductAttributeValue
														
 
															-# class ProductUploadExcelView(APIView):
														
 
															-#     """
														
 
															-#     POST API to upload an Excel file with two sheets:
														
 
															-#     1. 'Products' sheet - Product details
														
 
															-#     2. 'Attribute_values' sheet - Original attribute values
														
 
															-    
														
 
															-#     Creates/updates both products and their attribute values in a single transaction.
														
 
															-#     """
														
 
															-#     parser_classes = (MultiPartParser, FormParser)
														
 
															-#     def post(self, request, *args, **kwargs):
														
 
															-#         file_obj = request.FILES.get('file')
														
 
															-#         if not file_obj:
														
 
															-#             return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
														
 
															-#         try:
														
 
															-#             # Read all sheets from Excel file
														
 
															-#             excel_file = pd.ExcelFile(file_obj)
														
 
															-            
														
 
															-#             # Check if required sheets exist
														
 
															-#             if 'Products' not in excel_file.sheet_names:
														
 
															-#                 return Response({
														
 
															-#                     'error': "Missing 'Products' sheet",
														
 
															-#                     'available_sheets': excel_file.sheet_names
														
 
															-#                 }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-            
														
 
															-#             # Read Products sheet
														
 
															-#             df_products = pd.read_excel(excel_file, sheet_name='Products')
														
 
															-#             df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
														
 
															-
														
 
															-#             # Check required columns for Products
														
 
															-#             expected_product_cols = {
														
 
															-#                 'item_id',
														
 
															-#                 'product_name',
														
 
															-#                 'product_long_description',
														
 
															-#                 'product_short_description',
														
 
															-#                 'product_type',
														
 
															-#                 'image_path'
														
 
															-#             }
														
 
															-#             if not expected_product_cols.issubset(df_products.columns):
														
 
															-#                 return Response({
														
 
															-#                     'error': 'Missing required columns in Products sheet',
														
 
															-#                     'required_columns': list(expected_product_cols),
														
 
															-#                     'found_columns': list(df_products.columns)
														
 
															-#                 }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-#             # Read Attribute_values sheet if it exists
														
 
															-#             df_attributes = None
														
 
															-#             has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
														
 
															-            
														
 
															-#             if has_attributes_sheet:
														
 
															-#                 df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
														
 
															-#                 df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
														
 
															-                
														
 
															-#                 # Check required columns for Attribute_values
														
 
															-#                 expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
														
 
															-#                 if not expected_attr_cols.issubset(df_attributes.columns):
														
 
															-#                     return Response({
														
 
															-#                         'error': 'Missing required columns in Attribute_values sheet',
														
 
															-#                         'required_columns': list(expected_attr_cols),
														
 
															-#                         'found_columns': list(df_attributes.columns)
														
 
															-#                     }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-
														
 
															-#             # Initialize counters
														
 
															-#             products_created = 0
														
 
															-#             products_updated = 0
														
 
															-#             attributes_created = 0
														
 
															-#             attributes_updated = 0
														
 
															-#             products_failed = 0
														
 
															-#             attributes_failed = 0
														
 
															-#             errors = []
														
 
															-
														
 
															-#             # Use transaction to ensure atomicity
														
 
															-#             with transaction.atomic():
														
 
															-#                 # Process Products sheet
														
 
															-#                 for idx, row in df_products.iterrows():
														
 
															-#                     item_id = str(row.get('item_id', '')).strip()
														
 
															-#                     if not item_id:
														
 
															-#                         products_failed += 1
														
 
															-#                         errors.append(f"Products Row {idx + 2}: Missing item_id")
														
 
															-#                         continue
														
 
															-
														
 
															-#                     try:
														
 
															-#                         defaults = {
														
 
															-#                             'product_name': str(row.get('product_name', '')),
														
 
															-#                             'product_long_description': str(row.get('product_long_description', '')),
														
 
															-#                             'product_short_description': str(row.get('product_short_description', '')),
														
 
															-#                             'product_type': str(row.get('product_type', '')),
														
 
															-#                             'image_path': str(row.get('image_path', '')),
														
 
															-#                         }
														
 
															-
														
 
															-#                         obj, created = Product.objects.update_or_create(
														
 
															-#                             item_id=item_id,
														
 
															-#                             defaults=defaults
														
 
															-#                         )
														
 
															-#                         if created:
														
 
															-#                             products_created += 1
														
 
															-#                         else:
														
 
															-#                             products_updated += 1
														
 
															-#                     except Exception as e:
														
 
															-#                         products_failed += 1
														
 
															-#                         errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
														
 
															-
														
 
															-#                 # Process Attribute_values sheet if it exists
														
 
															-#                 if has_attributes_sheet and df_attributes is not None:
														
 
															-#                     # Group by item_id to optimize lookups
														
 
															-#                     item_ids_in_attrs = df_attributes['item_id'].unique()
														
 
															-                    
														
 
															-#                     # Fetch all products at once
														
 
															-#                     existing_products = {
														
 
															-#                         p.item_id: p 
														
 
															-#                         for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
														
 
															-#                     }
														
 
															-
														
 
															-#                     for idx, row in df_attributes.iterrows():
														
 
															-#                         item_id = str(row.get('item_id', '')).strip()
														
 
															-#                         attribute_name = str(row.get('attribute_name', '')).strip()
														
 
															-#                         original_value = str(row.get('original_value', '')).strip()
														
 
															-
														
 
															-#                         if not item_id or not attribute_name:
														
 
															-#                             attributes_failed += 1
														
 
															-#                             errors.append(
														
 
															-#                                 f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
														
 
															-#                             )
														
 
															-#                             continue
														
 
															-
														
 
															-#                         # Check if product exists
														
 
															-#                         product = existing_products.get(item_id)
														
 
															-#                         if not product:
														
 
															-#                             attributes_failed += 1
														
 
															-#                             errors.append(
														
 
															-#                                 f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
														
 
															-#                                 "Make sure it exists in Products sheet."
														
 
															-#                             )
														
 
															-#                             continue
														
 
															-
														
 
															-#                         try:
														
 
															-#                             attr_obj, created = ProductAttributeValue.objects.update_or_create(
														
 
															-#                                 product=product,
														
 
															-#                                 attribute_name=attribute_name,
														
 
															-#                                 defaults={'original_value': original_value}
														
 
															-#                             )
														
 
															-
														
 
															-#                             if created:
														
 
															-#                                 attributes_created += 1
														
 
															-#                             else:
														
 
															-#                                 attributes_updated += 1
														
 
															-#                         except Exception as e:
														
 
															-#                             attributes_failed += 1
														
 
															-#                             errors.append(
														
 
															-#                                 f"Attribute_values Row {idx + 2} "
														
 
															-#                                 f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
														
 
															-#                             )
														
 
															-
														
 
															-#             # Prepare response
														
 
															-#             response_data = {
														
 
															-#                 'message': 'Upload completed successfully',
														
 
															-#                 'products': {
														
 
															-#                     'created': products_created,
														
 
															-#                     'updated': products_updated,
														
 
															-#                     'failed': products_failed,
														
 
															-#                     'total_processed': products_created + products_updated + products_failed
														
 
															-#                 }
														
 
															-#             }
														
 
															-
														
 
															-#             if has_attributes_sheet:
														
 
															-#                 response_data['attribute_values'] = {
														
 
															-#                     'created': attributes_created,
														
 
															-#                     'updated': attributes_updated,
														
 
															-#                     'failed': attributes_failed,
														
 
															-#                     'total_processed': attributes_created + attributes_updated + attributes_failed
														
 
															-#                 }
														
 
															-#             else:
														
 
															-#                 response_data['attribute_values'] = {
														
 
															-#                     'message': 'Attribute_values sheet not found in Excel file'
														
 
															-#                 }
														
 
															-#             if errors:
														
 
															-#                 response_data['errors'] = errors[:50]  # Limit to first 50 errors
														
 
															-#                 if len(errors) > 50:
														
 
															-#                     response_data['errors'].append(f"... and {len(errors) - 50} more errors")
														
 
															-
														
 
															-#             # Determine status code
														
 
															-#             if products_failed > 0 or attributes_failed > 0:
														
 
															-#                 status_code = status.HTTP_207_MULTI_STATUS
														
 
															-#             else:
														
 
															-#                 status_code = status.HTTP_201_CREATED
														
 
															-
														
 
															-#             return Response(response_data, status=status_code)
														
 
															-
														
 
															-#         except pd.errors.EmptyDataError:
														
 
															-#             return Response({
														
 
															-#                 'error': 'The uploaded Excel file is empty or invalid'
														
 
															-#             }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-#         except Exception as e:
														
 
															-#             return Response({
														
 
															-#                 'error': f'An error occurred while processing the file: {str(e)}'
														
 
															-#             }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-# import logging
														
 
															-# import json
														
 
															-# from rest_framework.views import APIView
														
 
															-# from rest_framework.response import Response
														
 
															-# from rest_framework import status
														
 
															-# from rest_framework.parsers import MultiPartParser, FormParser
														
 
															-# from django.db import transaction
														
 
															-# from django.db.models import Prefetch, F
														
 
															-# import pandas as pd
														
 
															-# # Import ALL your models
														
 
															-# from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
														
 
															-# from .services import ProductAttributeService
														
 
															-# from .ocr_service import OCRService
														
 
															-# from .visual_processing_service import VisualProcessingService
														
 
															-# from openpyxl import Workbook
														
 
															-# from openpyxl.styles import Font, PatternFill, Alignment
														
 
															-# from django.conf import settings
														
 
															-# import os
														
 
															-# import threading
														
 
															-# from datetime import datetime
														
 
															-
														
 
															-# # --- Logging Setup ---
														
 
															-# # Define log and status file paths in MEDIA_ROOT
														
 
															-# LOG_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation.log')
														
 
															-# STATUS_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation_status.json')
														
 
															-
														
 
															-# # Ensure the MEDIA_ROOT exists for files to be saved
														
 
															-# if not os.path.exists(settings.MEDIA_ROOT):
														
 
															-#     os.makedirs(settings.MEDIA_ROOT)
														
 
															-
														
 
															-# # Configure basic logging
														
 
															-# logging.basicConfig(
														
 
															-#     filename=LOG_FILE_PATH,
														
 
															-#     level=logging.INFO,
														
 
															-#     format='%(asctime)s - %(levelname)s - %(message)s'
														
 
															-# )
														
 
															-# logger = logging.getLogger(__name__)
														
 
															-
														
 
															-# # -------------------------------------------------------------------------------------------------
														
 
															-
														
 
															-# def generate_product_excel_background():
														
 
															-#     """
														
 
															-#     Function to perform batch attribute extraction for all products and generate an Excel file.
														
 
															-#     Runs in a background thread to avoid blocking the API response.
														
 
															-#     Logs success/failure and saves a status file for external monitoring.
														
 
															-#     """
														
 
															-#     logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
														
 
															-    
														
 
															-#     successful = 0
														
 
															-#     failed = 0
														
 
															-#     results = [] # To store detailed extraction results for Excel sheet 2
														
 
															-    
														
 
															-#     # Function to write status file (SUCCESS/FAILED)
														
 
															-#     def write_status(status_type, error_msg=None):
														
 
															-#         status_data = {
														
 
															-#             "status": status_type,
														
 
															-#             "timestamp": datetime.now().isoformat(),
														
 
															-#             "products_processed": successful + failed,
														
 
															-#             "products_successful": successful,
														
 
															-#             "products_failed": failed,
														
 
															-#             "excel_path": os.path.join(settings.MEDIA_URL, 'generated_products.xlsx') if status_type == "SUCCESS" else None,
														
 
															-#             "log_path": os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
														
 
															-#             "error_message": error_msg
														
 
															-#         }
														
 
															-#         try:
														
 
															-#             with open(STATUS_FILE_PATH, 'w') as f:
														
 
															-#                 json.dump(status_data, f, indent=4)
														
 
															-#         except Exception as e:
														
 
															-#             logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
														
 
															-
														
 
															-#     try:
														
 
															-#         # 1. PREFETCH all necessary related data to minimize database queries
														
 
															-        
														
 
															-#         # Prefetch possible values for mandatory attributes
														
 
															-#         possible_values_prefetch = Prefetch(
														
 
															-#             'attributes',
														
 
															-#             queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
														
 
															-#         )
														
 
															-        
														
 
															-#         # Fetch all ProductTypes with their mandatory attributes and possible values
														
 
															-#         all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
														
 
															-#         product_type_map = {
														
 
															-#             pt.name: pt for pt in all_product_types
														
 
															-#         }
														
 
															-
														
 
															-#         # Prepare product_list for batch extraction
														
 
															-#         all_products = Product.objects.all()
														
 
															-#         product_list = []
														
 
															-        
														
 
															-#         for p in all_products:
														
 
															-#             # mandatory_attrs will be the dictionary required by the service
														
 
															-#             mandatory_attrs_dict = {}
														
 
															-#             product_type_name = p.product_type.strip() if p.product_type else None
														
 
															-            
														
 
															-#             if product_type_name and product_type_name in product_type_map:
														
 
															-#                 pt = product_type_map[product_type_name]
														
 
															-                
														
 
															-#                 # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
														
 
															-#                 for attr in pt.attributes.all(): # .all() here works because we used Prefetch for 'attributes'
														
 
															-#                     # attr.possible_values.all() works because we used prefetch_related('possible_values')
														
 
															-#                     mandatory_attrs_dict[attr.name] = [
														
 
															-#                         pv.value for pv in attr.possible_values.all()
														
 
															-#                     ]
														
 
															-            
														
 
															-#             product_list.append({
														
 
															-#                 "item_id": p.item_id,
														
 
															-#                 "product_type_name": product_type_name,
														
 
															-#                 "mandatory_attrs": mandatory_attrs_dict # <-- FIX: Pass the dictionary here
														
 
															-#             })
														
 
															-
														
 
															-#         # Batch settings (using defaults)
														
 
															-#         model = "llama-3.1-8b-instant"
														
 
															-#         extract_additional = True
														
 
															-#         process_image = False
														
 
															-#         multiple = []
														
 
															-#         threshold_abs = 0.65
														
 
															-#         margin = 0.15
														
 
															-#         use_dynamic_thresholds = True
														
 
															-#         use_adaptive_margin = True
														
 
															-#         use_semantic_clustering = True
														
 
															-
														
 
															-#         # Batch extraction logic
														
 
															-#         item_ids = [p['item_id'] for p in product_list]
														
 
															-#         products_queryset = Product.objects.filter(item_id__in=item_ids)
														
 
															-#         product_map = {product.item_id: product for product in products_queryset}
														
 
															-#         found_ids = set(product_map.keys())
														
 
															-
														
 
															-#         for product_entry in product_list:
														
 
															-#             item_id = product_entry['item_id']
														
 
															-#             # FIX: mandatory_attrs is now correctly a dictionary (or an empty dictionary)
														
 
															-#             mandatory_attrs = product_entry['mandatory_attrs'] 
														
 
															-
														
 
															-#             if item_id not in found_ids:
														
 
															-#                 failed += 1
														
 
															-#                 results.append({
														
 
															-#                     "product_id": item_id,
														
 
															-#                     "error": "Product not found in database"
														
 
															-#                 })
														
 
															-#                 logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
														
 
															-#                 continue
														
 
															-
														
 
															-#             product = product_map[item_id]
														
 
															-
														
 
															-#             try:
														
 
															-#                 title = product.product_name
														
 
															-#                 short_desc = product.product_short_description
														
 
															-#                 long_desc = product.product_long_description
														
 
															-#                 image_url = product.image_path
														
 
															-
														
 
															-#                 ocr_results = None
														
 
															-#                 ocr_text = None
														
 
															-#                 visual_results = None
														
 
															-
														
 
															-#                 if process_image and image_url:
														
 
															-#                     logger.info(f"Processing image for product {item_id}...")
														
 
															-#                     # OCR Processing
														
 
															-#                     ocr_service = OCRService()
														
 
															-#                     ocr_results = ocr_service.process_image(image_url)
														
 
															-
														
 
															-#                     if ocr_results and ocr_results.get("detected_text"):
														
 
															-#                         # NOTE: Assuming ProductAttributeService.extract_attributes_from_ocr exists
														
 
															-#                         ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
														
 
															-#                              ocr_results, model
														
 
															-#                         )
														
 
															-#                         ocr_results["extracted_attributes"] = ocr_attrs
														
 
															-#                         ocr_text = "\n".join([
														
 
															-#                              f"{item['text']} (confidence: {item['confidence']:.2f})"
														
 
															-#                              for item in ocr_results["detected_text"]
														
 
															-#                         ])
														
 
															-
														
 
															-#                     # Visual Processing
														
 
															-#                     visual_service = VisualProcessingService()
														
 
															-#                     product_type_hint = product.product_type if product.product_type else None
														
 
															-#                     visual_results = visual_service.process_image(image_url, product_type_hint)
														
 
															-
														
 
															-#                     if visual_results and visual_results.get('visual_attributes'):
														
 
															-#                         visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
														
 
															-#                             visual_results['visual_attributes']
														
 
															-#                         )
														
 
															-#                     logger.info(f"Image processing done for product {item_id}.")
														
 
															-
														
 
															-
														
 
															-#                 # Combine product text with source tracking
														
 
															-#                 product_text, source_map = ProductAttributeService.combine_product_text(
														
 
															-#                     title=title,
														
 
															-#                     short_desc=short_desc,
														
 
															-#                     long_desc=long_desc,
														
 
															-#                     ocr_text=ocr_text
														
 
															-#                 )
														
 
															-
														
 
															-#                 # Attribute Extraction with source tracking
														
 
															-#                 extracted = ProductAttributeService.extract_attributes(
														
 
															-#                     product_text=product_text,
														
 
															-#                     mandatory_attrs=mandatory_attrs, # <-- This is now the dictionary with possible values
														
 
															-#                     source_map=source_map,
														
 
															-#                     model=model,
														
 
															-#                     extract_additional=extract_additional,
														
 
															-#                     multiple=multiple,
														
 
															-#                     threshold_abs=threshold_abs,
														
 
															-#                     margin=margin,
														
 
															-#                     use_dynamic_thresholds=use_dynamic_thresholds,
														
 
															-#                     use_adaptive_margin=use_adaptive_margin,
														
 
															-#                     use_semantic_clustering=use_semantic_clustering
														
 
															-#                 )
														
 
															-
														
 
															-#                 result = {
														
 
															-#                     "product_id": item_id,
														
 
															-#                     "mandatory": extracted.get("mandatory", {}),
														
 
															-#                     "additional": extracted.get("additional", {}),
														
 
															-#                 }
														
 
															-
														
 
															-#                 if ocr_results:
														
 
															-#                     result["ocr_results"] = ocr_results
														
 
															-
														
 
															-#                 if visual_results:
														
 
															-#                     result["visual_results"] = visual_results
														
 
															-
														
 
															-#                 results.append(result)
														
 
															-#                 successful += 1
														
 
															-#                 logger.info(f"Attribute extraction successful for product {item_id}.")
														
 
															-
														
 
															-#             except Exception as e:
														
 
															-#                 failed += 1
														
 
															-#                 results.append({
														
 
															-#                     "product_id": item_id,
														
 
															-#                     "error": str(e)
														
 
															-#                 })
														
 
															-#                 # Original Error: AttributeError: 'list' object has no attribute 'items'
														
 
															-#                 # This should now be fixed, but we keep the robust exception handling.
														
 
															-#                 logger.exception(f"Error during attribute extraction for product {item_id}.")
														
 
															-
														
 
															-#         logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
														
 
															-        
														
 
															-#         # --------------------------------------------------------------------------------
														
 
															-#         # Generate and save the Excel file (Unchanged)
														
 
															-#         # --------------------------------------------------------------------------------
														
 
															-#         wb = Workbook()
														
 
															-
														
 
															-#         # Sheet 1: Products (from DB)
														
 
															-#         ws_products = wb.active
														
 
															-#         ws_products.title = "Products"
														
 
															-#         products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
														
 
															-#         header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
														
 
															-#         header_font = Font(bold=True, color="FFFFFF")
														
 
															-
														
 
															-#         for col_num, header in enumerate(products_headers, 1):
														
 
															-#             cell = ws_products.cell(row=1, column=col_num)
														
 
															-#             cell.value = header
														
 
															-#             cell.fill = header_fill
														
 
															-#             cell.font = header_font
														
 
															-#             cell.alignment = Alignment(horizontal="center", vertical="center")
														
 
															-
														
 
															-#         all_products_db = Product.objects.all()
														
 
															-#         for row_num, product in enumerate(all_products_db, 2):
														
 
															-#             ws_products.cell(row=row_num, column=1, value=product.item_id)
														
 
															-#             ws_products.cell(row=row_num, column=2, value=product.product_name)
														
 
															-#             ws_products.cell(row=row_num, column=3, value=product.product_type)
														
 
															-#             ws_products.cell(row=row_num, column=4, value=product.product_short_description)
														
 
															-#             ws_products.cell(row=row_num, column=5, value=product.product_long_description)
														
 
															-#             ws_products.cell(row=row_num, column=6, value=product.image_path)
														
 
															-
														
 
															-#         # Adjust column widths
														
 
															-#         for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
														
 
															-#              ws_products.column_dimensions[col_dim].width = width
														
 
															-
														
 
															-#         # Sheet 2: Attribute_values (item_id, attribute_name, original_value, generated_value)
														
 
															-#         ws_attributes = wb.create_sheet("Attribute_values")
														
 
															-#         attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
														
 
															-#         for col_num, header in enumerate(attributes_headers, 1):
														
 
															-#             cell = ws_attributes.cell(row=1, column=col_num)
														
 
															-#             cell.value = header
														
 
															-#             cell.fill = header_fill
														
 
															-#             cell.font = header_font
														
 
															-#             cell.alignment = Alignment(horizontal="center", vertical="center")
														
 
															-
														
 
															-#         # Fetch all original attributes
														
 
															-#         row_num = 2
														
 
															-#         all_original_attrs = ProductAttributeValue.objects.all()
														
 
															-#         # Create a lookup for original attributes by item_id and attribute_name
														
 
															-#         original_attrs_lookup = {
														
 
															-#             (attr.product.item_id, attr.attribute_name): attr.original_value
														
 
															-#             for attr in all_original_attrs
														
 
															-#         }
														
 
															-
														
 
															-#         # Add attributes (original and generated)
														
 
															-#         processed_original_keys = set()
														
 
															-#         for res in results:
														
 
															-#             item_id = res["product_id"]
														
 
															-
														
 
															-#             if "error" in res:
														
 
															-#                 # Add existing original attributes for failed products to the sheet
														
 
															-#                 for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
														
 
															-#                     if orig_item_id == item_id:
														
 
															-#                         ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
														
 
															-#                         ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
														
 
															-#                         ws_attributes.cell(row=row_num, column=3, value=orig_value)
														
 
															-#                         ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
														
 
															-#                         processed_original_keys.add((orig_item_id, orig_attr_name))
														
 
															-#                         row_num += 1
														
 
															-#                 continue
														
 
															-
														
 
															-#             # Combine all generated attributes (mandatory, additional, OCR, visual)
														
 
															-#             generated_attrs = {}
														
 
															-#             for cat in ["mandatory", "additional"]:
														
 
															-#                 attrs = res.get(cat, {})
														
 
															-#                 for attr_name, values in attrs.items():
														
 
															-#                     for val in values:
														
 
															-#                         key = (item_id, attr_name)
														
 
															-#                         if key not in generated_attrs:
														
 
															-#                             generated_attrs[key] = []
														
 
															-#                         generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
														
 
															-
														
 
															-#             # OCR extracted
														
 
															-#             ocr = res.get("ocr_results")
														
 
															-#             if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
														
 
															-#                 for attr_name, values in ocr["extracted_attributes"].items():
														
 
															-#                     for val in values:
														
 
															-#                         key = (item_id, attr_name)
														
 
															-#                         if key not in generated_attrs:
														
 
															-#                             generated_attrs[key] = []
														
 
															-#                         generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
														
 
															-
														
 
															-#             # Visual extracted
														
 
															-#             visual = res.get("visual_results")
														
 
															-#             if visual and "visual_attributes" in visual:
														
 
															-#                 vis_attrs = visual["visual_attributes"]
														
 
															-#                 if isinstance(vis_attrs, dict):
														
 
															-#                     # Handle dict format where value might be a list of dicts or a single value
														
 
															-#                     for attr_name, values in vis_attrs.items():
														
 
															-#                         if not isinstance(values, list):
														
 
															-#                             values = [{"value": values, "source": "visual"}]
														
 
															-#                         for val in values:
														
 
															-#                             key = (item_id, attr_name)
														
 
															-#                             if key not in generated_attrs:
														
 
															-#                                 generated_attrs[key] = []
														
 
															-#                             generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
														
 
															-#                 elif isinstance(vis_attrs, list):
														
 
															-#                     # Handle list of dicts format
														
 
															-#                     for item in vis_attrs:
														
 
															-#                         attr_name = item.get("attribute_name") or item.get("name")
														
 
															-#                         if not attr_name: continue
														
 
															-#                         value = item.get("value", "")
														
 
															-#                         source = item.get("source", "visual")
														
 
															-#                         key = (item_id, attr_name)
														
 
															-#                         if key not in generated_attrs:
														
 
															-#                             generated_attrs[key] = []
														
 
															-#                         generated_attrs[key].append(f"{value} (source: {source})")
														
 
															-
														
 
															-
														
 
															-#             # Write attributes to Excel
														
 
															-#             for (attr_item_id, attr_name), gen_values in generated_attrs.items():
														
 
															-#                 # Get original value from lookup (if it exists)
														
 
															-#                 original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
														
 
															-#                 # Combine multiple generated values into a single string
														
 
															-#                 generated_value = "; ".join(gen_values) if gen_values else ""
														
 
															-#                 # Write row
														
 
															-#                 ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
														
 
															-#                 ws_attributes.cell(row=row_num, column=2, value=attr_name)
														
 
															-#                 ws_attributes.cell(row=row_num, column=3, value=original_value)
														
 
															-#                 ws_attributes.cell(row=row_num, column=4, value=generated_value)
														
 
															-#                 processed_original_keys.add((attr_item_id, attr_name))
														
 
															-#                 row_num += 1
														
 
															-
														
 
															-#             # Add original attributes that have no generated values for this item_id
														
 
															-#             for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
														
 
															-#                 if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
														
 
															-#                     ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
														
 
															-#                     ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
														
 
															-#                     ws_attributes.cell(row=row_num, column=3, value=orig_value)
														
 
															-#                     ws_attributes.cell(row=row_num, column=4, value="") # No generated value
														
 
															-#                     processed_original_keys.add((orig_item_id, orig_attr_name))
														
 
															-#                     row_num += 1
														
 
															-        
														
 
															-#         # Add original attributes for products not included in the 'results' (e.g. if they didn't exist in product_list)
														
 
															-#         # We assume all products are in product_list, so this step might be redundant, but safe for completeness.
														
 
															-#         for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
														
 
															-#             if (orig_item_id, orig_attr_name) not in processed_original_keys:
														
 
															-#                 ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
														
 
															-#                 ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
														
 
															-#                 ws_attributes.cell(row=row_num, column=3, value=orig_value)
														
 
															-#                 ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
														
 
															-#                 row_num += 1
														
 
															-
														
 
															-
														
 
															-#         # Adjust column widths for attributes
														
 
															-#         for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
														
 
															-#              ws_attributes.column_dimensions[col_dim].width = width
														
 
															-
														
 
															-#         # Save the generated Excel (replace existing)
														
 
															-#         save_path = os.path.join(settings.MEDIA_ROOT, 'generated_products.xlsx')
														
 
															-#         wb.save(save_path)
														
 
															-#         logger.info(f"Excel file successfully saved to {save_path}")
														
 
															-        
														
 
															-#         # Write SUCCESS status
														
 
															-#         write_status("SUCCESS")
														
 
															-#         logger.info("Background task finished successfully.")
														
 
															-
														
 
															-
														
 
															-#     except Exception as e:
														
 
															-#         # Log the critical error and write FAILED status
														
 
															-#         logger.exception("CRITICAL ERROR during background Excel generation process.")
														
 
															-#         write_status("FAILED", error_msg=str(e))
														
 
															-
														
 
															-
														
 
															-# # -------------------------------------------------------------------------------------------------
														
 
															-
														
 
															-# class ProductUploadExcelView(APIView):
														
 
															-#     """
														
 
															-#     POST API to upload an Excel file. (Unchanged)
														
 
															-#     """
														
 
															-#     parser_classes = (MultiPartParser, FormParser)
														
 
															-
														
 
															-#     def post(self, request, *args, **kwargs):
														
 
															-#         file_obj = request.FILES.get('file')
														
 
															-#         if not file_obj:
														
 
															-#             return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
														
 
															-
														
 
															-#         try:
														
 
															-#             # Read all sheets from Excel file
														
 
															-#             excel_file = pd.ExcelFile(file_obj)
														
 
															-            
														
 
															-#             # Check if required sheets exist
														
 
															-#             if 'Products' not in excel_file.sheet_names:
														
 
															-#                  logger.error(f"Upload failed: Missing 'Products' sheet in file.")
														
 
															-#                  return Response({
														
 
															-#                      'error': "Missing 'Products' sheet",
														
 
															-#                      'available_sheets': excel_file.sheet_names
														
 
															-#                  }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-            
														
 
															-#             # Read Products sheet
														
 
															-#             df_products = pd.read_excel(excel_file, sheet_name='Products')
														
 
															-#             df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
														
 
															-
														
 
															-#             # Check required columns for Products
														
 
															-#             expected_product_cols = {
														
 
															-#                  'item_id', 'product_name', 'product_long_description',
														
 
															-#                  'product_short_description', 'product_type', 'image_path'
														
 
															-#             }
														
 
															-
														
 
															-#             if not expected_product_cols.issubset(df_products.columns):
														
 
															-#                  logger.error(f"Upload failed: Missing required columns in Products sheet.")
														
 
															-#                  return Response({
														
 
															-#                      'error': 'Missing required columns in Products sheet',
														
 
															-#                      'required_columns': list(expected_product_cols),
														
 
															-#                      'found_columns': list(df_products.columns)
														
 
															-#                  }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-
														
 
															-#             # Read Attribute_values sheet if it exists
														
 
															-#             df_attributes = None
														
 
															-#             has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
														
 
															-            
														
 
															-#             if has_attributes_sheet:
														
 
															-#                  df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
														
 
															-#                  df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
														
 
															-                 
														
 
															-#                  # Check required columns for Attribute_values
														
 
															-#                  expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
														
 
															-#                  if not expected_attr_cols.issubset(df_attributes.columns):
														
 
															-#                      logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
														
 
															-#                      return Response({
														
 
															-#                           'error': 'Missing required columns in Attribute_values sheet',
														
 
															-#                           'required_columns': list(expected_attr_cols),
														
 
															-#                           'found_columns': list(df_attributes.columns)
														
 
															-#                      }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-
														
 
															-#             # Initialize counters
														
 
															-#             products_created = 0
														
 
															-#             products_updated = 0
														
 
															-#             attributes_created = 0
														
 
															-#             attributes_updated = 0
														
 
															-#             products_failed = 0
														
 
															-#             attributes_failed = 0
														
 
															-#             errors = []
														
 
															-
														
 
															-#             # Use transaction to ensure atomicity
														
 
															-#             with transaction.atomic():
														
 
															-#                  # Process Products sheet
														
 
															-#                  for idx, row in df_products.iterrows():
														
 
															-#                      item_id = str(row.get('item_id', '')).strip()
														
 
															-#                      product_type = str(row.get('product_type', '')).strip()
														
 
															-
														
 
															-#                      if not item_id:
														
 
															-#                          products_failed += 1
														
 
															-#                          errors.append(f"Products Row {idx + 2}: Missing item_id")
														
 
															-#                          continue
														
 
															-
														
 
															-#                      try:
														
 
															-#                          # Auto-create ProductType if provided and doesn't exist
														
 
															-#                          if product_type:
														
 
															-#                              ProductType.objects.get_or_create(name=product_type)
														
 
															-
														
 
															-#                          defaults = {
														
 
															-#                              'product_name': str(row.get('product_name', '')),
														
 
															-#                              'product_long_description': str(row.get('product_long_description', '')),
														
 
															-#                              'product_short_description': str(row.get('product_short_description', '')),
														
 
															-#                              'product_type': product_type,
														
 
															-#                              'image_path': str(row.get('image_path', '')),
														
 
															-#                          }
														
 
															-
														
 
															-#                          obj, created = Product.objects.update_or_create(
														
 
															-#                              item_id=item_id,
														
 
															-#                              defaults=defaults
														
 
															-#                          )
														
 
															-
														
 
															-#                          if created:
														
 
															-#                              products_created += 1
														
 
															-#                          else:
														
 
															-#                              products_updated += 1
														
 
															-#                      except Exception as e:
														
 
															-#                          products_failed += 1
														
 
															-#                          errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
														
 
															-#                          logger.error(f"Error processing product {item_id} in Products sheet: {e}")
														
 
															-
														
 
															-
														
 
															-#                  # Process Attribute_values sheet if it exists
														
 
															-#                  if has_attributes_sheet and df_attributes is not None:
														
 
															-#                       # Group by item_id to optimize lookups
														
 
															-#                       item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
														
 
															-                      
														
 
															-#                       # Fetch all products at once
														
 
															-#                       existing_products = {
														
 
															-#                           p.item_id: p 
														
 
															-#                           for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
														
 
															-#                       }
														
 
															-
														
 
															-#                       for idx, row in df_attributes.iterrows():
														
 
															-#                           item_id = str(row.get('item_id', '')).strip()
														
 
															-#                           attribute_name = str(row.get('attribute_name', '')).strip()
														
 
															-#                           original_value = str(row.get('original_value', '')).strip()
														
 
															-
														
 
															-#                           if not item_id or not attribute_name:
														
 
															-#                               attributes_failed += 1
														
 
															-#                               errors.append(
														
 
															-#                                   f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
														
 
															-#                               )
														
 
															-#                               continue
														
 
															-
														
 
															-#                           # Check if product exists
														
 
															-#                           product = existing_products.get(item_id)
														
 
															-#                           if not product:
														
 
															-#                               attributes_failed += 1
														
 
															-#                               errors.append(
														
 
															-#                                   f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
														
 
															-#                                   "Make sure it exists in Products sheet."
														
 
															-#                               )
														
 
															-#                               continue
														
 
															-
														
 
															-#                           try:
														
 
															-#                               attr_obj, created = ProductAttributeValue.objects.update_or_create(
														
 
															-#                                   product=product,
														
 
															-#                                   attribute_name=attribute_name,
														
 
															-#                                   defaults={'original_value': original_value}
														
 
															-#                               )
														
 
															-
														
 
															-#                               if created:
														
 
															-#                                   attributes_created += 1
														
 
															-#                               else:
														
 
															-#                                   attributes_updated += 1
														
 
															-#                           except Exception as e:
														
 
															-#                               attributes_failed += 1
														
 
															-#                               errors.append(
														
 
															-#                                   f"Attribute_values Row {idx + 2} "
														
 
															-#                                   f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
														
 
															-#                               )
														
 
															-#                               logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
														
 
															-
														
 
															-#             # Prepare response data
														
 
															-#             response_data = {
														
 
															-#                 'message': 'Upload completed',
														
 
															-#                 'products': {
														
 
															-#                     'created': products_created,
														
 
															-#                     'updated': products_updated,
														
 
															-#                     'failed': products_failed,
														
 
															-#                     'total_processed': products_created + products_updated + products_failed
														
 
															-#                 },
														
 
															-#                 'attribute_values': {
														
 
															-#                      'created': attributes_created,
														
 
															-#                      'updated': attributes_updated,
														
 
															-#                      'failed': attributes_failed,
														
 
															-#                      'total_processed': attributes_created + attributes_updated + attributes_failed
														
 
															-#                 } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
														
 
															-#                 'generated_excel_status': 'Excel generation started in the background.'
														
 
															-#             }
														
 
															-
														
 
															-#             if errors:
														
 
															-#                 response_data['errors'] = errors[:50]
														
 
															-#                 if len(errors) > 50:
														
 
															-#                     response_data['errors'].append(f"... and {len(errors) - 50} more errors")
														
 
															-
														
 
															-#             # Determine status code for upload
														
 
															-#             upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
														
 
															-
														
 
															-#             # Start background thread for Excel generation if upload was successful
														
 
															-#             if products_failed == 0 and attributes_failed == 0:
														
 
															-#                 logger.info("API call successful. Triggering background Excel generation thread.")
														
 
															-#                 threading.Thread(target=generate_product_excel_background, daemon=True).start()
														
 
															-                
														
 
															-#                 # Update response to provide monitoring paths
														
 
															-#                 response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
														
 
															-#                 response_data['monitoring'] = {
														
 
															-#                      'excel_file': os.path.join(settings.MEDIA_URL, 'generated_products.xlsx'),
														
 
															-#                      'status_file': os.path.join(settings.MEDIA_URL, 'excel_generation_status.json'),
														
 
															-#                      'log_file': os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
														
 
															-#                      'note': 'These files will be available once the background process completes.'
														
 
															-#                 }
														
 
															-#             else:
														
 
															-#                  logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
														
 
															-#                  response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
														
 
															-
														
 
															-
														
 
															-#             return Response(response_data, status=upload_status)
														
 
															-
														
 
															-#         except pd.errors.EmptyDataError:
														
 
															-#             logger.error('The uploaded Excel file is empty or invalid.')
														
 
															-#             return Response({
														
 
															-#                 'error': 'The uploaded Excel file is empty or invalid'
														
 
															-#             }, status=status.HTTP_400_BAD_REQUEST)
														
 
															-#         except Exception as e:
														
 
															-#             logger.exception(f'An unexpected error occurred while processing the file.')
														
 
															-#             return Response({
														
 
															-#                 'error': f'An unexpected error occurred while processing the file: {str(e)}'
														
 
															-#             }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
														
--- a/attr_extraction/visual_processing_service.py
+++ b/attr_extraction/visual_processing_service.py
@@ -380,6 +380,11 @@ from sklearn.cluster import KMeans
 
															 logger = logging.getLogger(__name__)
														
 
															+import os
														
 
															+os.environ['TOKENIZERS_PARALLELISM'] = 'false'  # Disable tokenizer warnings
														
 
															+import warnings
														
 
															+warnings.filterwarnings('ignore')  # Suppress all warnings
														
 
															+
														
 
															 class VisualProcessingService:
														
 
															     """Service for extracting visual attributes from product images using CLIP with smart subcategory detection."""
														
@@ -585,6 +590,57 @@ class VisualProcessingService:
 
															         else:
														
 
															             return 'gray'
														
 
															+    # def classify_with_clip(
														
 
															+    #     self,
														
 
															+    #     image: Image.Image,
														
 
															+    #     candidates: List[str],
														
 
															+    #     attribute_name: str,
														
 
															+    #     confidence_threshold: float = 0.15
														
 
															+    # ) -> Dict:
														
 
															+    #     """Use CLIP to classify image against candidate labels."""
														
 
															+    #     try:
														
 
															+    #         model, processor = self._get_clip_model()
														
 
															+    #         device = self._get_device()
														
 
															+            
														
 
															+    #         # Prepare inputs
														
 
															+    #         inputs = processor(
														
 
															+    #             text=candidates,
														
 
															+    #             images=image,
														
 
															+    #             return_tensors="pt",
														
 
															+    #             padding=True
														
 
															+    #         )
														
 
															+            
														
 
															+    #         # Move to device
														
 
															+    #         inputs = {k: v.to(device) for k, v in inputs.items()}
														
 
															+            
														
 
															+    #         # Get predictions
														
 
															+    #         with torch.no_grad():
														
 
															+    #             outputs = model(**inputs)
														
 
															+    #             logits_per_image = outputs.logits_per_image
														
 
															+    #             probs = logits_per_image.softmax(dim=1).cpu()
														
 
															+            
														
 
															+    #         # Get top predictions
														
 
															+    #         top_k = min(3, len(candidates))
														
 
															+    #         top_probs, top_indices = torch.topk(probs[0], k=top_k)
														
 
															+            
														
 
															+    #         results = []
														
 
															+    #         for prob, idx in zip(top_probs, top_indices):
														
 
															+    #             if prob.item() > confidence_threshold:
														
 
															+    #                 results.append({
														
 
															+    #                     "value": candidates[idx.item()],
														
 
															+    #                     "confidence": round(float(prob.item()), 3)
														
 
															+    #                 })
														
 
															+            
														
 
															+    #         return {
														
 
															+    #             "attribute": attribute_name,
														
 
															+    #             "predictions": results
														
 
															+    #         }
														
 
															+            
														
 
															+    #     except Exception as e:
														
 
															+    #         logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
														
 
															+    #         return {"attribute": attribute_name, "predictions": []}
														
 
															+    
														
 
															+
														
 
															     def classify_with_clip(
														
 
															         self,
														
 
															         image: Image.Image,
														
@@ -597,44 +653,54 @@ class VisualProcessingService:
 
															             model, processor = self._get_clip_model()
														
 
															             device = self._get_device()
														
 
															-            # Prepare inputs
														
 
															-            inputs = processor(
														
 
															-                text=candidates,
														
 
															-                images=image,
														
 
															-                return_tensors="pt",
														
 
															-                padding=True
														
 
															-            )
														
 
															-            
														
 
															-            # Move to device
														
 
															-            inputs = {k: v.to(device) for k, v in inputs.items()}
														
 
															+            # ⚡ OPTIMIZATION: Process in smaller batches to avoid memory issues
														
 
															+            batch_size = 16  # Process 16 candidates at a time
														
 
															+            all_results = []
														
 
															-            # Get predictions
														
 
															-            with torch.no_grad():
														
 
															-                outputs = model(**inputs)
														
 
															-                logits_per_image = outputs.logits_per_image
														
 
															-                probs = logits_per_image.softmax(dim=1).cpu()
														
 
															-            
														
 
															-            # Get top predictions
														
 
															-            top_k = min(3, len(candidates))
														
 
															-            top_probs, top_indices = torch.topk(probs[0], k=top_k)
														
 
															+            for i in range(0, len(candidates), batch_size):
														
 
															+                batch_candidates = candidates[i:i + batch_size]
														
 
															+                
														
 
															+                # Prepare inputs WITHOUT progress bars
														
 
															+                inputs = processor(
														
 
															+                    text=batch_candidates,
														
 
															+                    images=image,
														
 
															+                    return_tensors="pt",
														
 
															+                    padding=True
														
 
															+                )
														
 
															+                
														
 
															+                # Move to device
														
 
															+                inputs = {k: v.to(device) for k, v in inputs.items()}
														
 
															+                
														
 
															+                # Get predictions
														
 
															+                with torch.no_grad():
														
 
															+                    outputs = model(**inputs)
														
 
															+                    logits_per_image = outputs.logits_per_image
														
 
															+                    probs = logits_per_image.softmax(dim=1).cpu()
														
 
															+                
														
 
															+                # Collect results from this batch
														
 
															+                for j, prob in enumerate(probs[0]):
														
 
															+                    if prob.item() > confidence_threshold:
														
 
															+                        all_results.append({
														
 
															+                            "value": batch_candidates[j],
														
 
															+                            "confidence": round(float(prob.item()), 3)
														
 
															+                        })
														
 
															-            results = []
														
 
															-            for prob, idx in zip(top_probs, top_indices):
														
 
															-                if prob.item() > confidence_threshold:
														
 
															-                    results.append({
														
 
															-                        "value": candidates[idx.item()],
														
 
															-                        "confidence": round(float(prob.item()), 3)
														
 
															-                    })
														
 
															+            # Sort by confidence and return top 3
														
 
															+            all_results.sort(key=lambda x: x['confidence'], reverse=True)
														
 
															             return {
														
 
															                 "attribute": attribute_name,
														
 
															-                "predictions": results
														
 
															+                "predictions": all_results[:3]
														
 
															             }
														
 
															         except Exception as e:
														
 
															             logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
														
 
															             return {"attribute": attribute_name, "predictions": []}
														
 
															-    
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															     def detect_category_and_subcategory(self, image: Image.Image) -> Tuple[str, str, str, float]:
														
 
															         """
														
 
															         Hierarchically detect category, subcategory, and specific product.
														
@@ -869,7 +935,6 @@ class VisualProcessingService:
 
															-
														
 
															 # # ==================== visual_processing_service_enhanced.py ====================
														
--- a/db.sqlite3
+++ b/db.sqlite3
--- a/media/generated_outputs/excel_generation_status.json
+++ b/media/generated_outputs/excel_generation_status.json
@@ -1,6 +1,6 @@
 
															 {
														
 
															     "status": "SUCCESS",
														
 
															-    "timestamp": "2025-10-27T15:43:17.202230",
														
 
															+    "timestamp": "2025-10-28T11:44:45.161843",
														
 
															     "products_processed": 15,
														
 
															     "products_successful": 15,
														
 
															     "products_failed": 0,
														
--- a/media/generated_outputs/generated_products.xlsx
+++ b/media/generated_outputs/generated_products.xlsx
--- a/media/generated_outputs/~$generated_products.xlsx
+++ b/media/generated_outputs/~$generated_products.xlsx