3 달 전 · 780e17ac26
--- a/attr_extraction/services.py
+++ b/attr_extraction/services.py
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -132,11 +132,14 @@ class ExtractProductAttributesView(APIView):
 
				         return Response(result, status=status.HTTP_200_OK)
			
 
				 
			
 
				 
			
 
				+
			
 
				+# Replace the BatchExtractProductAttributesView in your views.py with this updated version
			
 
				+
			
 
				 # class BatchExtractProductAttributesView(APIView):
			
 
				 #     """
			
 
				 #     API endpoint to extract product attributes for multiple products in batch.
			
 
				 #     Uses item-specific mandatory_attrs with source tracking.
			
 
				-#     Returns attributes in array format: [{"value": "...", "source": "..."}]
			
 
				+#     Returns attributes in array format with original_value field.
			
 
				 #     Includes OCR and Visual Processing results.
			
 
				 #     """
			
 
				 
			
@@ -147,15 +150,6 @@ class ExtractProductAttributesView(APIView):
 
				 
			
 
				 #         validated_data = serializer.validated_data
			
 
				         
			
 
				-#         # DEBUG: Print what we received
			
 
				-#         print("\n" + "="*80)
			
 
				-#         print("BATCH REQUEST - RECEIVED DATA")
			
 
				-#         print("="*80)
			
 
				-#         print(f"Raw request data keys: {request.data.keys()}")
			
 
				-#         print(f"Multiple field in request: {request.data.get('multiple')}")
			
 
				-#         print(f"Validated multiple field: {validated_data.get('multiple')}")
			
 
				-#         print("="*80 + "\n")
			
 
				-        
			
 
				 #         # Get batch-level settings
			
 
				 #         product_list = validated_data.get("products", [])
			
 
				 #         model = validated_data.get("model")
			
@@ -168,27 +162,33 @@ class ExtractProductAttributesView(APIView):
 
				 #         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
			
 
				 #         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
			
 
				         
			
 
				-#         # DEBUG: Print extracted settings
			
 
				-#         print(f"Extracted multiple parameter: {multiple}")
			
 
				-#         print(f"Type: {type(multiple)}")
			
 
				-        
			
 
				 #         # Extract all item_ids to query the database efficiently
			
 
				 #         item_ids = [p['item_id'] for p in product_list] 
			
 
				         
			
 
				 #         # Fetch all products in one query
			
 
				 #         products_queryset = Product.objects.filter(item_id__in=item_ids)
			
 
				-        
			
 
				-#         # Create a dictionary for easy lookup: item_id -> Product object
			
 
				 #         product_map = {product.item_id: product for product in products_queryset}
			
 
				 #         found_ids = set(product_map.keys())
			
 
				         
			
 
				+#         # Fetch all original attribute values for these products in one query
			
 
				+#         original_values_qs = ProductAttributeValue.objects.filter(
			
 
				+#             product__item_id__in=item_ids
			
 
				+#         ).select_related('product')
			
 
				+        
			
 
				+#         # Create a nested dictionary: {item_id: {attribute_name: original_value}}
			
 
				+#         original_values_map = {}
			
 
				+#         for attr_val in original_values_qs:
			
 
				+#             item_id = attr_val.product.item_id
			
 
				+#             if item_id not in original_values_map:
			
 
				+#                 original_values_map[item_id] = {}
			
 
				+#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
			
 
				+        
			
 
				 #         results = []
			
 
				 #         successful = 0
			
 
				 #         failed = 0
			
 
				 
			
 
				 #         for product_entry in product_list:
			
 
				 #             item_id = product_entry['item_id']
			
 
				-#             # Get item-specific mandatory attributes
			
 
				 #             mandatory_attrs = product_entry['mandatory_attrs'] 
			
 
				 
			
 
				 #             if item_id not in found_ids:
			
@@ -206,7 +206,7 @@ class ExtractProductAttributesView(APIView):
 
				 #                 short_desc = product.product_short_description
			
 
				 #                 long_desc = product.product_long_description
			
 
				 #                 image_url = product.image_path
			
 
				-#                 # image_url = "https://images.unsplash.com/photo-1595777457583-95e059d581b8"
			
 
				+                
			
 
				 #                 ocr_results = None
			
 
				 #                 ocr_text = None
			
 
				 #                 visual_results = None
			
@@ -216,7 +216,6 @@ class ExtractProductAttributesView(APIView):
 
				 #                     # OCR Processing
			
 
				 #                     ocr_service = OCRService()
			
 
				 #                     ocr_results = ocr_service.process_image(image_url)
			
 
				-#                     print(f"OCR results for {item_id}: {ocr_results}")
			
 
				                     
			
 
				 #                     if ocr_results and ocr_results.get("detected_text"):
			
 
				 #                         ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
@@ -232,7 +231,6 @@ class ExtractProductAttributesView(APIView):
 
				 #                     visual_service = VisualProcessingService()
			
 
				 #                     product_type_hint = product.product_type if hasattr(product, 'product_type') else None
			
 
				 #                     visual_results = visual_service.process_image(image_url, product_type_hint)
			
 
				-#                     print(f"Visual results for {item_id}: {visual_results.get('visual_attributes', {})}")
			
 
				                     
			
 
				 #                     # Format visual attributes to array format with source tracking
			
 
				 #                     if visual_results and visual_results.get('visual_attributes'):
			
@@ -248,10 +246,6 @@ class ExtractProductAttributesView(APIView):
 
				 #                     ocr_text=ocr_text
			
 
				 #                 )
			
 
				 
			
 
				-#                 # DEBUG: Print before extraction
			
 
				-#                 print(f"\n>>> Extracting for product {item_id}")
			
 
				-#                 print(f"    Passing multiple: {multiple}")
			
 
				-
			
 
				 #                 # Attribute Extraction with source tracking (returns array format)
			
 
				 #                 extracted = ProductAttributeService.extract_attributes(
			
 
				 #                     product_text=product_text,
			
@@ -267,6 +261,25 @@ class ExtractProductAttributesView(APIView):
 
				 #                     use_semantic_clustering=use_semantic_clustering
			
 
				 #                 )
			
 
				 
			
 
				+#                 # Add original_value to each extracted attribute
			
 
				+#                 original_attrs = original_values_map.get(item_id, {})
			
 
				+                
			
 
				+#                 # Process mandatory attributes
			
 
				+#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
			
 
				+#                     if isinstance(attr_values, list):
			
 
				+#                         for attr_obj in attr_values:
			
 
				+#                             if isinstance(attr_obj, dict):
			
 
				+#                                 # Add original_value if it exists
			
 
				+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				+                
			
 
				+#                 # Process additional attributes
			
 
				+#                 for attr_name, attr_values in extracted.get("additional", {}).items():
			
 
				+#                     if isinstance(attr_values, list):
			
 
				+#                         for attr_obj in attr_values:
			
 
				+#                             if isinstance(attr_obj, dict):
			
 
				+#                                 # Add original_value if it exists
			
 
				+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				+
			
 
				 #                 result = {
			
 
				 #                     "product_id": product.item_id,
			
 
				 #                     "mandatory": extracted.get("mandatory", {}),
			
@@ -305,51 +318,244 @@ class ExtractProductAttributesView(APIView):
 
				 #         return Response(batch_result, status=status.HTTP_200_OK)
			
 
				 
			
 
				 
			
 
				+# views.py - OPTIMIZED WITHOUT REDIS/CELERY
			
 
				 
			
 
				+# class BatchExtractProductAttributesView(APIView):
			
 
				+#     """
			
 
				+#     Optimized batch extraction using ThreadPoolExecutor (built-in Python)
			
 
				+#     """
			
 
				 
			
 
				-# Replace the BatchExtractProductAttributesView in your views.py with this updated version
			
 
				+#     def post(self, request):
			
 
				+#         serializer = BatchProductRequestSerializer(data=request.data)
			
 
				+#         if not serializer.is_valid():
			
 
				+#             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
			
 
				+
			
 
				+#         validated_data = serializer.validated_data
			
 
				+#         product_list = validated_data.get("products", [])
			
 
				+        
			
 
				+#         # OPTIMIZATION 1: Single optimized database query
			
 
				+#         item_ids = [p['item_id'] for p in product_list]
			
 
				+#         products_queryset = Product.objects.filter(
			
 
				+#             item_id__in=item_ids
			
 
				+#         ).prefetch_related('attribute_values')  # Single query!
			
 
				+        
			
 
				+#         product_map = {product.item_id: product for product in products_queryset}
			
 
				+        
			
 
				+#         # OPTIMIZATION 2: Prefetch ALL original attribute values in ONE query
			
 
				+#         original_values_qs = ProductAttributeValue.objects.filter(
			
 
				+#             product__item_id__in=item_ids
			
 
				+#         ).select_related('product')
			
 
				+        
			
 
				+#         original_values_map = {}
			
 
				+#         for attr_val in original_values_qs:
			
 
				+#             item_id = attr_val.product.item_id
			
 
				+#             if item_id not in original_values_map:
			
 
				+#                 original_values_map[item_id] = {}
			
 
				+#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
			
 
				+        
			
 
				+#         # Extract settings
			
 
				+#         model = validated_data.get("model")
			
 
				+#         extract_additional = validated_data.get("extract_additional", True)
			
 
				+#         process_image = validated_data.get("process_image", True)
			
 
				+#         multiple = validated_data.get("multiple", [])
			
 
				+#         threshold_abs = validated_data.get("threshold_abs", 0.65)
			
 
				+#         margin = validated_data.get("margin", 0.15)
			
 
				+#         use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
			
 
				+#         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
			
 
				+#         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
			
 
				+        
			
 
				+#         results = []
			
 
				+#         successful = 0
			
 
				+#         failed = 0
			
 
				+        
			
 
				+#         # OPTIMIZATION 3: Initialize services once
			
 
				+#         ocr_service = OCRService() if process_image else None
			
 
				+#         visual_service = VisualProcessingService() if process_image else None
			
 
				+
			
 
				+#         # OPTIMIZATION 4: Process in parallel using ThreadPoolExecutor
			
 
				+#         def process_single_product(product_entry):
			
 
				+#             """Process a single product (runs in parallel)"""
			
 
				+#             item_id = product_entry['item_id']
			
 
				+#             mandatory_attrs = product_entry['mandatory_attrs']
			
 
				+
			
 
				+#             if item_id not in product_map:
			
 
				+#                 return {
			
 
				+#                     "product_id": item_id,
			
 
				+#                     "error": "Product not found in database"
			
 
				+#                 }, False
			
 
				+
			
 
				+#             product = product_map[item_id]
			
 
				+            
			
 
				+#             try:
			
 
				+#                 title = product.product_name
			
 
				+#                 short_desc = product.product_short_description
			
 
				+#                 long_desc = product.product_long_description
			
 
				+#                 image_url = product.image_path
			
 
				+                
			
 
				+#                 ocr_results = None
			
 
				+#                 ocr_text = None
			
 
				+#                 visual_results = None
			
 
				+
			
 
				+#                 # Image processing (if enabled)
			
 
				+#                 if process_image and image_url:
			
 
				+#                     if ocr_service:
			
 
				+#                         ocr_results = ocr_service.process_image(image_url)
			
 
				+                        
			
 
				+#                         if ocr_results and ocr_results.get("detected_text"):
			
 
				+#                             ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				+#                                 ocr_results, model
			
 
				+#                             )
			
 
				+#                             ocr_results["extracted_attributes"] = ocr_attrs
			
 
				+#                             ocr_text = "\n".join([
			
 
				+#                                 f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				+#                                 for item in ocr_results["detected_text"]
			
 
				+#                             ])
			
 
				+                    
			
 
				+#                     if visual_service:
			
 
				+#                         product_type_hint = product.product_type if hasattr(product, 'product_type') else None
			
 
				+#                         visual_results = visual_service.process_image(image_url, product_type_hint)
			
 
				+                        
			
 
				+#                         if visual_results and visual_results.get('visual_attributes'):
			
 
				+#                             visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
			
 
				+#                                 visual_results['visual_attributes']
			
 
				+#                             )
			
 
				+
			
 
				+#                 # Combine product text with source tracking
			
 
				+#                 product_text, source_map = ProductAttributeService.combine_product_text(
			
 
				+#                     title=title,
			
 
				+#                     short_desc=short_desc,
			
 
				+#                     long_desc=long_desc,
			
 
				+#                     ocr_text=ocr_text
			
 
				+#                 )
			
 
				+
			
 
				+#                 # Extract attributes (WITH CACHING ENABLED)
			
 
				+#                 extracted = ProductAttributeService.extract_attributes(
			
 
				+#                     product_text=product_text,
			
 
				+#                     mandatory_attrs=mandatory_attrs,
			
 
				+#                     source_map=source_map,
			
 
				+#                     model=model,
			
 
				+#                     extract_additional=extract_additional,
			
 
				+#                     multiple=multiple,
			
 
				+#                     threshold_abs=threshold_abs,
			
 
				+#                     margin=margin,
			
 
				+#                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				+#                     use_adaptive_margin=use_adaptive_margin,
			
 
				+#                     use_semantic_clustering=use_semantic_clustering,
			
 
				+#                     use_cache=True  # Enable caching!
			
 
				+#                 )
			
 
				+
			
 
				+#                 # Add original values
			
 
				+#                 original_attrs = original_values_map.get(item_id, {})
			
 
				+                
			
 
				+#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
			
 
				+#                     if isinstance(attr_values, list):
			
 
				+#                         for attr_obj in attr_values:
			
 
				+#                             if isinstance(attr_obj, dict):
			
 
				+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				+                
			
 
				+#                 for attr_name, attr_values in extracted.get("additional", {}).items():
			
 
				+#                     if isinstance(attr_values, list):
			
 
				+#                         for attr_obj in attr_values:
			
 
				+#                             if isinstance(attr_obj, dict):
			
 
				+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				+
			
 
				+#                 result = {
			
 
				+#                     "product_id": product.item_id,
			
 
				+#                     "mandatory": extracted.get("mandatory", {}),
			
 
				+#                     "additional": extracted.get("additional", {}),
			
 
				+#                 }
			
 
				+
			
 
				+#                 if ocr_results:
			
 
				+#                     result["ocr_results"] = ocr_results
			
 
				+                
			
 
				+#                 if visual_results:
			
 
				+#                     result["visual_results"] = visual_results
			
 
				+
			
 
				+#                 return result, True
			
 
				+
			
 
				+#             except Exception as e:
			
 
				+#                 return {
			
 
				+#                     "product_id": item_id,
			
 
				+#                     "error": str(e)
			
 
				+#                 }, False
			
 
				+
			
 
				+#         # OPTIMIZATION 5: Use ThreadPoolExecutor for parallel processing
			
 
				+#         import concurrent.futures
			
 
				+#         max_workers = min(10, len(product_list))  # Up to 10 parallel workers
			
 
				+        
			
 
				+#         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				+#             # Submit all tasks
			
 
				+#             future_to_product = {
			
 
				+#                 executor.submit(process_single_product, product): product
			
 
				+#                 for product in product_list
			
 
				+#             }
			
 
				+            
			
 
				+#             # Collect results as they complete
			
 
				+#             for future in concurrent.futures.as_completed(future_to_product):
			
 
				+#                 try:
			
 
				+#                     result, success = future.result()
			
 
				+#                     results.append(result)
			
 
				+#                     if success:
			
 
				+#                         successful += 1
			
 
				+#                     else:
			
 
				+#                         failed += 1
			
 
				+#                 except Exception as e:
			
 
				+#                     failed += 1
			
 
				+#                     logger.error(f"Unexpected error: {str(e)}")
			
 
				+#                     results.append({
			
 
				+#                         "product_id": "unknown",
			
 
				+#                         "error": str(e)
			
 
				+#                     })
			
 
				+
			
 
				+#         batch_result = {
			
 
				+#             "results": results,
			
 
				+#             "total_products": len(product_list),
			
 
				+#             "successful": successful,
			
 
				+#             "failed": failed
			
 
				+#         }
			
 
				+
			
 
				+#         response_serializer = BatchProductResponseSerializer(data=batch_result)
			
 
				+#         if response_serializer.is_valid():
			
 
				+#             return Response(response_serializer.data, status=status.HTTP_200_OK)
			
 
				+
			
 
				+#         return Response(batch_result, status=status.HTTP_200_OK)
			
 
				+
			
 
				+
			
 
				+# ==================== OPTIMIZED BATCH VIEW ====================
			
 
				+import concurrent.futures
			
 
				 
			
 
				 class BatchExtractProductAttributesView(APIView):
			
 
				     """
			
 
				-    API endpoint to extract product attributes for multiple products in batch.
			
 
				-    Uses item-specific mandatory_attrs with source tracking.
			
 
				-    Returns attributes in array format with original_value field.
			
 
				-    Includes OCR and Visual Processing results.
			
 
				+    ⚡ PERFORMANCE OPTIMIZED: Batch extraction with intelligent parallelization
			
 
				+    Expected performance: 10 products in 30-60 seconds (with image processing)
			
 
				     """
			
 
				 
			
 
				     def post(self, request):
			
 
				+        import time
			
 
				+        start_time = time.time()
			
 
				+        
			
 
				         serializer = BatchProductRequestSerializer(data=request.data)
			
 
				         if not serializer.is_valid():
			
 
				             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
			
 
				 
			
 
				         validated_data = serializer.validated_data
			
 
				-        
			
 
				-        # Get batch-level settings
			
 
				         product_list = validated_data.get("products", [])
			
 
				-        model = validated_data.get("model")
			
 
				-        extract_additional = validated_data.get("extract_additional", True)
			
 
				-        process_image = validated_data.get("process_image", True)
			
 
				-        multiple = validated_data.get("multiple", [])
			
 
				-        threshold_abs = validated_data.get("threshold_abs", 0.65)
			
 
				-        margin = validated_data.get("margin", 0.15)
			
 
				-        use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
			
 
				-        use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
			
 
				-        use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
			
 
				         
			
 
				-        # Extract all item_ids to query the database efficiently
			
 
				-        item_ids = [p['item_id'] for p in product_list] 
			
 
				+        logger.info(f"🚀 Starting batch processing for {len(product_list)} products")
			
 
				+        
			
 
				+        # ==================== OPTIMIZATION 1: Bulk DB Query ====================
			
 
				+        item_ids = [p['item_id'] for p in product_list]
			
 
				+        products_queryset = Product.objects.filter(
			
 
				+            item_id__in=item_ids
			
 
				+        ).prefetch_related('attribute_values')
			
 
				         
			
 
				-        # Fetch all products in one query
			
 
				-        products_queryset = Product.objects.filter(item_id__in=item_ids)
			
 
				         product_map = {product.item_id: product for product in products_queryset}
			
 
				-        found_ids = set(product_map.keys())
			
 
				         
			
 
				-        # Fetch all original attribute values for these products in one query
			
 
				+        # Prefetch ALL original attribute values in ONE query
			
 
				         original_values_qs = ProductAttributeValue.objects.filter(
			
 
				             product__item_id__in=item_ids
			
 
				         ).select_related('product')
			
 
				         
			
 
				-        # Create a nested dictionary: {item_id: {attribute_name: original_value}}
			
 
				         original_values_map = {}
			
 
				         for attr_val in original_values_qs:
			
 
				             item_id = attr_val.product.item_id
			
@@ -357,25 +563,53 @@ class BatchExtractProductAttributesView(APIView):
 
				                 original_values_map[item_id] = {}
			
 
				             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
			
 
				         
			
 
				+        logger.info(f"✓ Loaded {len(product_map)} products from database")
			
 
				+        
			
 
				+        # Extract settings
			
 
				+        model = validated_data.get("model")
			
 
				+        extract_additional = validated_data.get("extract_additional", True)
			
 
				+        process_image = validated_data.get("process_image", True)
			
 
				+        multiple = validated_data.get("multiple", [])
			
 
				+        threshold_abs = validated_data.get("threshold_abs", 0.65)
			
 
				+        margin = validated_data.get("margin", 0.15)
			
 
				+        use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
			
 
				+        use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
			
 
				+        use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
			
 
				+        
			
 
				         results = []
			
 
				         successful = 0
			
 
				         failed = 0
			
 
				+        
			
 
				+        # ==================== OPTIMIZATION 2: Conditional Service Init ====================
			
 
				+        # Only initialize if processing images
			
 
				+        ocr_service = None
			
 
				+        visual_service = None
			
 
				+        
			
 
				+        if process_image:
			
 
				+            from .ocr_service import OCRService
			
 
				+            from .visual_processing_service import VisualProcessingService
			
 
				+            ocr_service = OCRService()
			
 
				+            visual_service = VisualProcessingService()
			
 
				+            logger.info("✓ Image processing services initialized")
			
 
				 
			
 
				-        for product_entry in product_list:
			
 
				+        # ==================== OPTIMIZATION 3: Smart Parallelization ====================
			
 
				+        def process_single_product(product_entry):
			
 
				+            """Process a single product (runs in parallel)"""
			
 
				+            import time
			
 
				+            product_start = time.time()
			
 
				+            
			
 
				             item_id = product_entry['item_id']
			
 
				-            mandatory_attrs = product_entry['mandatory_attrs'] 
			
 
				+            mandatory_attrs = product_entry['mandatory_attrs']
			
 
				 
			
 
				-            if item_id not in found_ids:
			
 
				-                failed += 1
			
 
				-                results.append({
			
 
				+            if item_id not in product_map:
			
 
				+                return {
			
 
				                     "product_id": item_id,
			
 
				                     "error": "Product not found in database"
			
 
				-                })
			
 
				-                continue
			
 
				+                }, False
			
 
				 
			
 
				             product = product_map[item_id]
			
 
				             
			
 
				-            try: 
			
 
				+            try:
			
 
				                 title = product.product_name
			
 
				                 short_desc = product.product_short_description
			
 
				                 long_desc = product.product_long_description
			
@@ -385,32 +619,29 @@ class BatchExtractProductAttributesView(APIView):
 
				                 ocr_text = None
			
 
				                 visual_results = None
			
 
				 
			
 
				-                # Image Processing Logic
			
 
				+                # ⚡ SKIP IMAGE PROCESSING IF DISABLED (HUGE TIME SAVER)
			
 
				                 if process_image and image_url:
			
 
				-                    # OCR Processing
			
 
				-                    ocr_service = OCRService()
			
 
				-                    ocr_results = ocr_service.process_image(image_url)
			
 
				-                    
			
 
				-                    if ocr_results and ocr_results.get("detected_text"):
			
 
				-                        ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				-                            ocr_results, model
			
 
				-                        )
			
 
				-                        ocr_results["extracted_attributes"] = ocr_attrs
			
 
				-                        ocr_text = "\n".join([
			
 
				-                            f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				-                            for item in ocr_results["detected_text"]
			
 
				-                        ])
			
 
				+                    if ocr_service:
			
 
				+                        ocr_results = ocr_service.process_image(image_url)
			
 
				+                        
			
 
				+                        if ocr_results and ocr_results.get("detected_text"):
			
 
				+                            ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				+                                ocr_results, model
			
 
				+                            )
			
 
				+                            ocr_results["extracted_attributes"] = ocr_attrs
			
 
				+                            ocr_text = "\n".join([
			
 
				+                                f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				+                                for item in ocr_results["detected_text"]
			
 
				+                            ])
			
 
				                     
			
 
				-                    # Visual Processing
			
 
				-                    visual_service = VisualProcessingService()
			
 
				-                    product_type_hint = product.product_type if hasattr(product, 'product_type') else None
			
 
				-                    visual_results = visual_service.process_image(image_url, product_type_hint)
			
 
				-                    
			
 
				-                    # Format visual attributes to array format with source tracking
			
 
				-                    if visual_results and visual_results.get('visual_attributes'):
			
 
				-                        visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
			
 
				-                            visual_results['visual_attributes']
			
 
				-                        )
			
 
				+                    if visual_service:
			
 
				+                        product_type_hint = product.product_type if hasattr(product, 'product_type') else None
			
 
				+                        visual_results = visual_service.process_image(image_url, product_type_hint)
			
 
				+                        
			
 
				+                        if visual_results and visual_results.get('visual_attributes'):
			
 
				+                            visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
			
 
				+                                visual_results['visual_attributes']
			
 
				+                            )
			
 
				 
			
 
				                 # Combine product text with source tracking
			
 
				                 product_text, source_map = ProductAttributeService.combine_product_text(
			
@@ -420,7 +651,7 @@ class BatchExtractProductAttributesView(APIView):
 
				                     ocr_text=ocr_text
			
 
				                 )
			
 
				 
			
 
				-                # Attribute Extraction with source tracking (returns array format)
			
 
				+                # ⚡ EXTRACT ATTRIBUTES WITH CACHING ENABLED
			
 
				                 extracted = ProductAttributeService.extract_attributes(
			
 
				                     product_text=product_text,
			
 
				                     mandatory_attrs=mandatory_attrs,
			
@@ -432,26 +663,23 @@ class BatchExtractProductAttributesView(APIView):
 
				                     margin=margin,
			
 
				                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				                     use_adaptive_margin=use_adaptive_margin,
			
 
				-                    use_semantic_clustering=use_semantic_clustering
			
 
				+                    use_semantic_clustering=use_semantic_clustering,
			
 
				+                    use_cache=True  # ⚡ CRITICAL: Enable caching
			
 
				                 )
			
 
				 
			
 
				-                # Add original_value to each extracted attribute
			
 
				+                # Add original values
			
 
				                 original_attrs = original_values_map.get(item_id, {})
			
 
				                 
			
 
				-                # Process mandatory attributes
			
 
				                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
			
 
				                     if isinstance(attr_values, list):
			
 
				                         for attr_obj in attr_values:
			
 
				                             if isinstance(attr_obj, dict):
			
 
				-                                # Add original_value if it exists
			
 
				                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				                 
			
 
				-                # Process additional attributes
			
 
				                 for attr_name, attr_values in extracted.get("additional", {}).items():
			
 
				                     if isinstance(attr_values, list):
			
 
				                         for attr_obj in attr_values:
			
 
				                             if isinstance(attr_obj, dict):
			
 
				-                                # Add original_value if it exists
			
 
				                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
			
 
				 
			
 
				                 result = {
			
@@ -460,29 +688,82 @@ class BatchExtractProductAttributesView(APIView):
 
				                     "additional": extracted.get("additional", {}),
			
 
				                 }
			
 
				 
			
 
				-                # Attach OCR results if available
			
 
				                 if ocr_results:
			
 
				                     result["ocr_results"] = ocr_results
			
 
				                 
			
 
				-                # Attach Visual Processing results if available
			
 
				                 if visual_results:
			
 
				                     result["visual_results"] = visual_results
			
 
				+                
			
 
				+                processing_time = time.time() - product_start
			
 
				+                logger.info(f"✓ Processed {item_id} in {processing_time:.2f}s")
			
 
				 
			
 
				-                results.append(result)
			
 
				-                successful += 1
			
 
				+                return result, True
			
 
				 
			
 
				             except Exception as e:
			
 
				-                failed += 1
			
 
				-                results.append({
			
 
				+                logger.error(f"❌ Error processing {item_id}: {str(e)}")
			
 
				+                return {
			
 
				                     "product_id": item_id,
			
 
				                     "error": str(e)
			
 
				-                })
			
 
				+                }, False
			
 
				+
			
 
				+        # ==================== OPTIMIZATION 4: Parallel Execution ====================
			
 
				+        # Adjust workers based on whether image processing is enabled
			
 
				+        max_workers = min(3 if process_image else 10, len(product_list))
			
 
				+        
			
 
				+        logger.info(f"⚡ Using {max_workers} parallel workers")
			
 
				+        
			
 
				+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				+            # Submit all tasks
			
 
				+            future_to_product = {
			
 
				+                executor.submit(process_single_product, product): product
			
 
				+                for product in product_list
			
 
				+            }
			
 
				+            
			
 
				+            # Collect results as they complete
			
 
				+            for future in concurrent.futures.as_completed(future_to_product):
			
 
				+                try:
			
 
				+                    result, success = future.result()
			
 
				+                    results.append(result)
			
 
				+                    if success:
			
 
				+                        successful += 1
			
 
				+                    else:
			
 
				+                        failed += 1
			
 
				+                except Exception as e:
			
 
				+                    failed += 1
			
 
				+                    logger.error(f"❌ Future execution error: {str(e)}")
			
 
				+                    results.append({
			
 
				+                        "product_id": "unknown",
			
 
				+                        "error": str(e)
			
 
				+                    })
			
 
				+
			
 
				+        total_time = time.time() - start_time
			
 
				+        
			
 
				+        # Get cache statistics
			
 
				+        cache_stats = ProductAttributeService.get_cache_stats()
			
 
				+        
			
 
				+        logger.info(f"""
			
 
				+🎉 BATCH PROCESSING COMPLETE
			
 
				+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
			
 
				+  Total products: {len(product_list)}
			
 
				+  Successful: {successful}
			
 
				+  Failed: {failed}
			
 
				+  Total time: {total_time:.2f}s
			
 
				+  Avg time/product: {total_time/len(product_list):.2f}s
			
 
				+  Cache hit rate: {cache_stats['embedding_cache']['hit_rate_percent']:.1f}%
			
 
				+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
			
 
				+        """)
			
 
				 
			
 
				         batch_result = {
			
 
				             "results": results,
			
 
				             "total_products": len(product_list),
			
 
				             "successful": successful,
			
 
				-            "failed": failed
			
 
				+            "failed": failed,
			
 
				+            "performance": {
			
 
				+                "total_time_seconds": round(total_time, 2),
			
 
				+                "avg_time_per_product": round(total_time / len(product_list), 2),
			
 
				+                "workers_used": max_workers
			
 
				+            },
			
 
				+            "cache_stats": cache_stats
			
 
				         }
			
 
				 
			
 
				         response_serializer = BatchProductResponseSerializer(data=batch_result)
			
@@ -493,7 +774,6 @@ class BatchExtractProductAttributesView(APIView):
 
				 
			
 
				 
			
 
				 
			
 
				-
			
 
				 class ProductListView(APIView):
			
 
				     """
			
 
				     GET API to list all products with details
			
@@ -512,77 +792,6 @@ import pandas as pd
 
				 from .models import Product
			
 
				 
			
 
				 
			
 
				-# class ProductUploadExcelView(APIView):
			
 
				-#     """
			
 
				-#     POST API to upload an Excel file and add/update data in Product model.
			
 
				-#     - Creates new records if they don't exist.
			
 
				-#     - Updates existing ones (e.g., when image_path or other fields change).
			
 
				-#     """
			
 
				-#     parser_classes = (MultiPartParser, FormParser)
			
 
				-
			
 
				-#     def post(self, request, *args, **kwargs):
			
 
				-#         file_obj = request.FILES.get('file')
			
 
				-#         if not file_obj:
			
 
				-#             return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
			
 
				-
			
 
				-#         try:
			
 
				-#             # Read Excel into DataFrame
			
 
				-#             df = pd.read_excel(file_obj)
			
 
				-#             df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
			
 
				-
			
 
				-#             expected_cols = {
			
 
				-#                 'item_id',
			
 
				-#                 'product_name',
			
 
				-#                 'product_long_description',
			
 
				-#                 'product_short_description',
			
 
				-#                 'product_type',
			
 
				-#                 'image_path'
			
 
				-#             }
			
 
				-
			
 
				-#             # Check required columns
			
 
				-#             if not expected_cols.issubset(df.columns):
			
 
				-#                 return Response({
			
 
				-#                     'error': 'Missing required columns',
			
 
				-#                     'required_columns': list(expected_cols)
			
 
				-#                 }, status=status.HTTP_400_BAD_REQUEST)
			
 
				-
			
 
				-#             created_count = 0
			
 
				-#             updated_count = 0
			
 
				-
			
 
				-#             # Loop through rows and update or create
			
 
				-#             for _, row in df.iterrows():
			
 
				-#                 item_id = str(row.get('item_id', '')).strip()
			
 
				-#                 if not item_id:
			
 
				-#                     continue  # Skip rows without an item_id
			
 
				-
			
 
				-#                 defaults = {
			
 
				-#                     'product_name': row.get('product_name', ''),
			
 
				-#                     'product_long_description': row.get('product_long_description', ''),
			
 
				-#                     'product_short_description': row.get('product_short_description', ''),
			
 
				-#                     'product_type': row.get('product_type', ''),
			
 
				-#                     'image_path': row.get('image_path', ''),
			
 
				-#                 }
			
 
				-
			
 
				-#                 obj, created = Product.objects.update_or_create(
			
 
				-#                     item_id=item_id,
			
 
				-#                     defaults=defaults
			
 
				-#                 )
			
 
				-
			
 
				-#                 if created:
			
 
				-#                     created_count += 1
			
 
				-#                 else:
			
 
				-#                     updated_count += 1
			
 
				-
			
 
				-#             return Response({
			
 
				-#                 'message': f'Upload successful.',
			
 
				-#                 'created': f'{created_count} new records added.',
			
 
				-#                 'updated': f'{updated_count} existing records updated.'
			
 
				-#             }, status=status.HTTP_201_CREATED)
			
 
				-
			
 
				-#         except Exception as e:
			
 
				-#             return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
			
 
				-# Replace the ProductUploadExcelView in your views.py with this updated version
			
 
				-
			
 
				 from rest_framework.views import APIView
			
 
				 from rest_framework.response import Response
			
 
				 from rest_framework import status
			
@@ -592,835 +801,12 @@ import pandas as pd
 
				 from .models import Product, ProductAttributeValue
			
 
				 
			
 
				 
			
 
				-# class ProductUploadExcelView(APIView):
			
 
				-#     """
			
 
				-#     POST API to upload an Excel file with two sheets:
			
 
				-#     1. 'Products' sheet - Product details
			
 
				-#     2. 'Attribute_values' sheet - Original attribute values
			
 
				-    
			
 
				-#     Creates/updates both products and their attribute values in a single transaction.
			
 
				-#     """
			
 
				-#     parser_classes = (MultiPartParser, FormParser)
			
 
				 
			
 
				-#     def post(self, request, *args, **kwargs):
			
 
				-#         file_obj = request.FILES.get('file')
			
 
				-#         if not file_obj:
			
 
				-#             return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
			
 
				 
			
 
				-#         try:
			
 
				-#             # Read all sheets from Excel file
			
 
				-#             excel_file = pd.ExcelFile(file_obj)
			
 
				-            
			
 
				-#             # Check if required sheets exist
			
 
				-#             if 'Products' not in excel_file.sheet_names:
			
 
				-#                 return Response({
			
 
				-#                     'error': "Missing 'Products' sheet",
			
 
				-#                     'available_sheets': excel_file.sheet_names
			
 
				-#                 }, status=status.HTTP_400_BAD_REQUEST)
			
 
				-            
			
 
				-#             # Read Products sheet
			
 
				-#             df_products = pd.read_excel(excel_file, sheet_name='Products')
			
 
				-#             df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
			
 
				-
			
 
				-#             # Check required columns for Products
			
 
				-#             expected_product_cols = {
			
 
				-#                 'item_id',
			
 
				-#                 'product_name',
			
 
				-#                 'product_long_description',
			
 
				-#                 'product_short_description',
			
 
				-#                 'product_type',
			
 
				-#                 'image_path'
			
 
				-#             }
			
 
				 
			
 
				-#             if not expected_product_cols.issubset(df_products.columns):
			
 
				-#                 return Response({
			
 
				-#                     'error': 'Missing required columns in Products sheet',
			
 
				-#                     'required_columns': list(expected_product_cols),
			
 
				-#                     'found_columns': list(df_products.columns)
			
 
				-#                 }, status=status.HTTP_400_BAD_REQUEST)
			
 
				 
			
 
				-#             # Read Attribute_values sheet if it exists
			
 
				-#             df_attributes = None
			
 
				-#             has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
			
 
				-            
			
 
				-#             if has_attributes_sheet:
			
 
				-#                 df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
			
 
				-#                 df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
			
 
				-                
			
 
				-#                 # Check required columns for Attribute_values
			
 
				-#                 expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
			
 
				-#                 if not expected_attr_cols.issubset(df_attributes.columns):
			
 
				-#                     return Response({
			
 
				-#                         'error': 'Missing required columns in Attribute_values sheet',
			
 
				-#                         'required_columns': list(expected_attr_cols),
			
 
				-#                         'found_columns': list(df_attributes.columns)
			
 
				-#                     }, status=status.HTTP_400_BAD_REQUEST)
			
 
				-
			
 
				-#             # Initialize counters
			
 
				-#             products_created = 0
			
 
				-#             products_updated = 0
			
 
				-#             attributes_created = 0
			
 
				-#             attributes_updated = 0
			
 
				-#             products_failed = 0
			
 
				-#             attributes_failed = 0
			
 
				-#             errors = []
			
 
				-
			
 
				-#             # Use transaction to ensure atomicity
			
 
				-#             with transaction.atomic():
			
 
				-#                 # Process Products sheet
			
 
				-#                 for idx, row in df_products.iterrows():
			
 
				-#                     item_id = str(row.get('item_id', '')).strip()
			
 
				-#                     if not item_id:
			
 
				-#                         products_failed += 1
			
 
				-#                         errors.append(f"Products Row {idx + 2}: Missing item_id")
			
 
				-#                         continue
			
 
				-
			
 
				-#                     try:
			
 
				-#                         defaults = {
			
 
				-#                             'product_name': str(row.get('product_name', '')),
			
 
				-#                             'product_long_description': str(row.get('product_long_description', '')),
			
 
				-#                             'product_short_description': str(row.get('product_short_description', '')),
			
 
				-#                             'product_type': str(row.get('product_type', '')),
			
 
				-#                             'image_path': str(row.get('image_path', '')),
			
 
				-#                         }
			
 
				-
			
 
				-#                         obj, created = Product.objects.update_or_create(
			
 
				-#                             item_id=item_id,
			
 
				-#                             defaults=defaults
			
 
				-#                         )
			
 
				 
			
 
				-#                         if created:
			
 
				-#                             products_created += 1
			
 
				-#                         else:
			
 
				-#                             products_updated += 1
			
 
				-#                     except Exception as e:
			
 
				-#                         products_failed += 1
			
 
				-#                         errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
			
 
				-
			
 
				-#                 # Process Attribute_values sheet if it exists
			
 
				-#                 if has_attributes_sheet and df_attributes is not None:
			
 
				-#                     # Group by item_id to optimize lookups
			
 
				-#                     item_ids_in_attrs = df_attributes['item_id'].unique()
			
 
				-                    
			
 
				-#                     # Fetch all products at once
			
 
				-#                     existing_products = {
			
 
				-#                         p.item_id: p 
			
 
				-#                         for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
			
 
				-#                     }
			
 
				-
			
 
				-#                     for idx, row in df_attributes.iterrows():
			
 
				-#                         item_id = str(row.get('item_id', '')).strip()
			
 
				-#                         attribute_name = str(row.get('attribute_name', '')).strip()
			
 
				-#                         original_value = str(row.get('original_value', '')).strip()
			
 
				-
			
 
				-#                         if not item_id or not attribute_name:
			
 
				-#                             attributes_failed += 1
			
 
				-#                             errors.append(
			
 
				-#                                 f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
			
 
				-#                             )
			
 
				-#                             continue
			
 
				-
			
 
				-#                         # Check if product exists
			
 
				-#                         product = existing_products.get(item_id)
			
 
				-#                         if not product:
			
 
				-#                             attributes_failed += 1
			
 
				-#                             errors.append(
			
 
				-#                                 f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
			
 
				-#                                 "Make sure it exists in Products sheet."
			
 
				-#                             )
			
 
				-#                             continue
			
 
				-
			
 
				-#                         try:
			
 
				-#                             attr_obj, created = ProductAttributeValue.objects.update_or_create(
			
 
				-#                                 product=product,
			
 
				-#                                 attribute_name=attribute_name,
			
 
				-#                                 defaults={'original_value': original_value}
			
 
				-#                             )
			
 
				-
			
 
				-#                             if created:
			
 
				-#                                 attributes_created += 1
			
 
				-#                             else:
			
 
				-#                                 attributes_updated += 1
			
 
				-#                         except Exception as e:
			
 
				-#                             attributes_failed += 1
			
 
				-#                             errors.append(
			
 
				-#                                 f"Attribute_values Row {idx + 2} "
			
 
				-#                                 f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
			
 
				-#                             )
			
 
				-
			
 
				-#             # Prepare response
			
 
				-#             response_data = {
			
 
				-#                 'message': 'Upload completed successfully',
			
 
				-#                 'products': {
			
 
				-#                     'created': products_created,
			
 
				-#                     'updated': products_updated,
			
 
				-#                     'failed': products_failed,
			
 
				-#                     'total_processed': products_created + products_updated + products_failed
			
 
				-#                 }
			
 
				-#             }
			
 
				-
			
 
				-#             if has_attributes_sheet:
			
 
				-#                 response_data['attribute_values'] = {
			
 
				-#                     'created': attributes_created,
			
 
				-#                     'updated': attributes_updated,
			
 
				-#                     'failed': attributes_failed,
			
 
				-#                     'total_processed': attributes_created + attributes_updated + attributes_failed
			
 
				-#                 }
			
 
				-#             else:
			
 
				-#                 response_data['attribute_values'] = {
			
 
				-#                     'message': 'Attribute_values sheet not found in Excel file'
			
 
				-#                 }
			
 
				 
			
 
				-#             if errors:
			
 
				-#                 response_data['errors'] = errors[:50]  # Limit to first 50 errors
			
 
				-#                 if len(errors) > 50:
			
 
				-#                     response_data['errors'].append(f"... and {len(errors) - 50} more errors")
			
 
				-
			
 
				-#             # Determine status code
			
 
				-#             if products_failed > 0 or attributes_failed > 0:
			
 
				-#                 status_code = status.HTTP_207_MULTI_STATUS
			
 
				-#             else:
			
 
				-#                 status_code = status.HTTP_201_CREATED
			
 
				-
			
 
				-#             return Response(response_data, status=status_code)
			
 
				-
			
 
				-#         except pd.errors.EmptyDataError:
			
 
				-#             return Response({
			
 
				-#                 'error': 'The uploaded Excel file is empty or invalid'
			
 
				-#             }, status=status.HTTP_400_BAD_REQUEST)
			
 
				-#         except Exception as e:
			
 
				-#             return Response({
			
 
				-#                 'error': f'An error occurred while processing the file: {str(e)}'
			
 
				-#             }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-# import logging
			
 
				-# import json
			
 
				-# from rest_framework.views import APIView
			
 
				-# from rest_framework.response import Response
			
 
				-# from rest_framework import status
			
 
				-# from rest_framework.parsers import MultiPartParser, FormParser
			
 
				-# from django.db import transaction
			
 
				-# from django.db.models import Prefetch, F
			
 
				-# import pandas as pd
			
 
				-# # Import ALL your models
			
 
				-# from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
			
 
				-# from .services import ProductAttributeService
			
 
				-# from .ocr_service import OCRService
			
 
				-# from .visual_processing_service import VisualProcessingService
			
 
				-# from openpyxl import Workbook
			
 
				-# from openpyxl.styles import Font, PatternFill, Alignment
			
 
				-# from django.conf import settings
			
 
				-# import os
			
 
				-# import threading
			
 
				-# from datetime import datetime
			
 
				-
			
 
				-# # --- Logging Setup ---
			
 
				-# # Define log and status file paths in MEDIA_ROOT
			
 
				-# LOG_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation.log')
			
 
				-# STATUS_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation_status.json')
			
 
				-
			
 
				-# # Ensure the MEDIA_ROOT exists for files to be saved
			
 
				-# if not os.path.exists(settings.MEDIA_ROOT):
			
 
				-#     os.makedirs(settings.MEDIA_ROOT)
			
 
				-
			
 
				-# # Configure basic logging
			
 
				-# logging.basicConfig(
			
 
				-#     filename=LOG_FILE_PATH,
			
 
				-#     level=logging.INFO,
			
 
				-#     format='%(asctime)s - %(levelname)s - %(message)s'
			
 
				-# )
			
 
				-# logger = logging.getLogger(__name__)
			
 
				-
			
 
				-# # -------------------------------------------------------------------------------------------------
			
 
				-
			
 
				-# def generate_product_excel_background():
			
 
				-#     """
			
 
				-#     Function to perform batch attribute extraction for all products and generate an Excel file.
			
 
				-#     Runs in a background thread to avoid blocking the API response.
			
 
				-#     Logs success/failure and saves a status file for external monitoring.
			
 
				-#     """
			
 
				-#     logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
			
 
				-    
			
 
				-#     successful = 0
			
 
				-#     failed = 0
			
 
				-#     results = [] # To store detailed extraction results for Excel sheet 2
			
 
				-    
			
 
				-#     # Function to write status file (SUCCESS/FAILED)
			
 
				-#     def write_status(status_type, error_msg=None):
			
 
				-#         status_data = {
			
 
				-#             "status": status_type,
			
 
				-#             "timestamp": datetime.now().isoformat(),
			
 
				-#             "products_processed": successful + failed,
			
 
				-#             "products_successful": successful,
			
 
				-#             "products_failed": failed,
			
 
				-#             "excel_path": os.path.join(settings.MEDIA_URL, 'generated_products.xlsx') if status_type == "SUCCESS" else None,
			
 
				-#             "log_path": os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
			
 
				-#             "error_message": error_msg
			
 
				-#         }
			
 
				-#         try:
			
 
				-#             with open(STATUS_FILE_PATH, 'w') as f:
			
 
				-#                 json.dump(status_data, f, indent=4)
			
 
				-#         except Exception as e:
			
 
				-#             logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
			
 
				-
			
 
				-#     try:
			
 
				-#         # 1. PREFETCH all necessary related data to minimize database queries
			
 
				-        
			
 
				-#         # Prefetch possible values for mandatory attributes
			
 
				-#         possible_values_prefetch = Prefetch(
			
 
				-#             'attributes',
			
 
				-#             queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
			
 
				-#         )
			
 
				-        
			
 
				-#         # Fetch all ProductTypes with their mandatory attributes and possible values
			
 
				-#         all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
			
 
				-#         product_type_map = {
			
 
				-#             pt.name: pt for pt in all_product_types
			
 
				-#         }
			
 
				-
			
 
				-#         # Prepare product_list for batch extraction
			
 
				-#         all_products = Product.objects.all()
			
 
				-#         product_list = []
			
 
				-        
			
 
				-#         for p in all_products:
			
 
				-#             # mandatory_attrs will be the dictionary required by the service
			
 
				-#             mandatory_attrs_dict = {}
			
 
				-#             product_type_name = p.product_type.strip() if p.product_type else None
			
 
				-            
			
 
				-#             if product_type_name and product_type_name in product_type_map:
			
 
				-#                 pt = product_type_map[product_type_name]
			
 
				-                
			
 
				-#                 # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
			
 
				-#                 for attr in pt.attributes.all(): # .all() here works because we used Prefetch for 'attributes'
			
 
				-#                     # attr.possible_values.all() works because we used prefetch_related('possible_values')
			
 
				-#                     mandatory_attrs_dict[attr.name] = [
			
 
				-#                         pv.value for pv in attr.possible_values.all()
			
 
				-#                     ]
			
 
				-            
			
 
				-#             product_list.append({
			
 
				-#                 "item_id": p.item_id,
			
 
				-#                 "product_type_name": product_type_name,
			
 
				-#                 "mandatory_attrs": mandatory_attrs_dict # <-- FIX: Pass the dictionary here
			
 
				-#             })
			
 
				-
			
 
				-#         # Batch settings (using defaults)
			
 
				-#         model = "llama-3.1-8b-instant"
			
 
				-#         extract_additional = True
			
 
				-#         process_image = False
			
 
				-#         multiple = []
			
 
				-#         threshold_abs = 0.65
			
 
				-#         margin = 0.15
			
 
				-#         use_dynamic_thresholds = True
			
 
				-#         use_adaptive_margin = True
			
 
				-#         use_semantic_clustering = True
			
 
				-
			
 
				-#         # Batch extraction logic
			
 
				-#         item_ids = [p['item_id'] for p in product_list]
			
 
				-#         products_queryset = Product.objects.filter(item_id__in=item_ids)
			
 
				-#         product_map = {product.item_id: product for product in products_queryset}
			
 
				-#         found_ids = set(product_map.keys())
			
 
				-
			
 
				-#         for product_entry in product_list:
			
 
				-#             item_id = product_entry['item_id']
			
 
				-#             # FIX: mandatory_attrs is now correctly a dictionary (or an empty dictionary)
			
 
				-#             mandatory_attrs = product_entry['mandatory_attrs'] 
			
 
				-
			
 
				-#             if item_id not in found_ids:
			
 
				-#                 failed += 1
			
 
				-#                 results.append({
			
 
				-#                     "product_id": item_id,
			
 
				-#                     "error": "Product not found in database"
			
 
				-#                 })
			
 
				-#                 logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
			
 
				-#                 continue
			
 
				-
			
 
				-#             product = product_map[item_id]
			
 
				-
			
 
				-#             try:
			
 
				-#                 title = product.product_name
			
 
				-#                 short_desc = product.product_short_description
			
 
				-#                 long_desc = product.product_long_description
			
 
				-#                 image_url = product.image_path
			
 
				-
			
 
				-#                 ocr_results = None
			
 
				-#                 ocr_text = None
			
 
				-#                 visual_results = None
			
 
				-
			
 
				-#                 if process_image and image_url:
			
 
				-#                     logger.info(f"Processing image for product {item_id}...")
			
 
				-#                     # OCR Processing
			
 
				-#                     ocr_service = OCRService()
			
 
				-#                     ocr_results = ocr_service.process_image(image_url)
			
 
				-
			
 
				-#                     if ocr_results and ocr_results.get("detected_text"):
			
 
				-#                         # NOTE: Assuming ProductAttributeService.extract_attributes_from_ocr exists
			
 
				-#                         ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				-#                              ocr_results, model
			
 
				-#                         )
			
 
				-#                         ocr_results["extracted_attributes"] = ocr_attrs
			
 
				-#                         ocr_text = "\n".join([
			
 
				-#                              f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				-#                              for item in ocr_results["detected_text"]
			
 
				-#                         ])
			
 
				-
			
 
				-#                     # Visual Processing
			
 
				-#                     visual_service = VisualProcessingService()
			
 
				-#                     product_type_hint = product.product_type if product.product_type else None
			
 
				-#                     visual_results = visual_service.process_image(image_url, product_type_hint)
			
 
				-
			
 
				-#                     if visual_results and visual_results.get('visual_attributes'):
			
 
				-#                         visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
			
 
				-#                             visual_results['visual_attributes']
			
 
				-#                         )
			
 
				-#                     logger.info(f"Image processing done for product {item_id}.")
			
 
				-
			
 
				-
			
 
				-#                 # Combine product text with source tracking
			
 
				-#                 product_text, source_map = ProductAttributeService.combine_product_text(
			
 
				-#                     title=title,
			
 
				-#                     short_desc=short_desc,
			
 
				-#                     long_desc=long_desc,
			
 
				-#                     ocr_text=ocr_text
			
 
				-#                 )
			
 
				-
			
 
				-#                 # Attribute Extraction with source tracking
			
 
				-#                 extracted = ProductAttributeService.extract_attributes(
			
 
				-#                     product_text=product_text,
			
 
				-#                     mandatory_attrs=mandatory_attrs, # <-- This is now the dictionary with possible values
			
 
				-#                     source_map=source_map,
			
 
				-#                     model=model,
			
 
				-#                     extract_additional=extract_additional,
			
 
				-#                     multiple=multiple,
			
 
				-#                     threshold_abs=threshold_abs,
			
 
				-#                     margin=margin,
			
 
				-#                     use_dynamic_thresholds=use_dynamic_thresholds,
			
 
				-#                     use_adaptive_margin=use_adaptive_margin,
			
 
				-#                     use_semantic_clustering=use_semantic_clustering
			
 
				-#                 )
			
 
				-
			
 
				-#                 result = {
			
 
				-#                     "product_id": item_id,
			
 
				-#                     "mandatory": extracted.get("mandatory", {}),
			
 
				-#                     "additional": extracted.get("additional", {}),
			
 
				-#                 }
			
 
				-
			
 
				-#                 if ocr_results:
			
 
				-#                     result["ocr_results"] = ocr_results
			
 
				-
			
 
				-#                 if visual_results:
			
 
				-#                     result["visual_results"] = visual_results
			
 
				-
			
 
				-#                 results.append(result)
			
 
				-#                 successful += 1
			
 
				-#                 logger.info(f"Attribute extraction successful for product {item_id}.")
			
 
				-
			
 
				-#             except Exception as e:
			
 
				-#                 failed += 1
			
 
				-#                 results.append({
			
 
				-#                     "product_id": item_id,
			
 
				-#                     "error": str(e)
			
 
				-#                 })
			
 
				-#                 # Original Error: AttributeError: 'list' object has no attribute 'items'
			
 
				-#                 # This should now be fixed, but we keep the robust exception handling.
			
 
				-#                 logger.exception(f"Error during attribute extraction for product {item_id}.")
			
 
				-
			
 
				-#         logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
			
 
				-        
			
 
				-#         # --------------------------------------------------------------------------------
			
 
				-#         # Generate and save the Excel file (Unchanged)
			
 
				-#         # --------------------------------------------------------------------------------
			
 
				-#         wb = Workbook()
			
 
				-
			
 
				-#         # Sheet 1: Products (from DB)
			
 
				-#         ws_products = wb.active
			
 
				-#         ws_products.title = "Products"
			
 
				-#         products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
			
 
				-#         header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
			
 
				-#         header_font = Font(bold=True, color="FFFFFF")
			
 
				-
			
 
				-#         for col_num, header in enumerate(products_headers, 1):
			
 
				-#             cell = ws_products.cell(row=1, column=col_num)
			
 
				-#             cell.value = header
			
 
				-#             cell.fill = header_fill
			
 
				-#             cell.font = header_font
			
 
				-#             cell.alignment = Alignment(horizontal="center", vertical="center")
			
 
				-
			
 
				-#         all_products_db = Product.objects.all()
			
 
				-#         for row_num, product in enumerate(all_products_db, 2):
			
 
				-#             ws_products.cell(row=row_num, column=1, value=product.item_id)
			
 
				-#             ws_products.cell(row=row_num, column=2, value=product.product_name)
			
 
				-#             ws_products.cell(row=row_num, column=3, value=product.product_type)
			
 
				-#             ws_products.cell(row=row_num, column=4, value=product.product_short_description)
			
 
				-#             ws_products.cell(row=row_num, column=5, value=product.product_long_description)
			
 
				-#             ws_products.cell(row=row_num, column=6, value=product.image_path)
			
 
				-
			
 
				-#         # Adjust column widths
			
 
				-#         for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
			
 
				-#              ws_products.column_dimensions[col_dim].width = width
			
 
				-
			
 
				-#         # Sheet 2: Attribute_values (item_id, attribute_name, original_value, generated_value)
			
 
				-#         ws_attributes = wb.create_sheet("Attribute_values")
			
 
				-#         attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
			
 
				-#         for col_num, header in enumerate(attributes_headers, 1):
			
 
				-#             cell = ws_attributes.cell(row=1, column=col_num)
			
 
				-#             cell.value = header
			
 
				-#             cell.fill = header_fill
			
 
				-#             cell.font = header_font
			
 
				-#             cell.alignment = Alignment(horizontal="center", vertical="center")
			
 
				-
			
 
				-#         # Fetch all original attributes
			
 
				-#         row_num = 2
			
 
				-#         all_original_attrs = ProductAttributeValue.objects.all()
			
 
				-#         # Create a lookup for original attributes by item_id and attribute_name
			
 
				-#         original_attrs_lookup = {
			
 
				-#             (attr.product.item_id, attr.attribute_name): attr.original_value
			
 
				-#             for attr in all_original_attrs
			
 
				-#         }
			
 
				-
			
 
				-#         # Add attributes (original and generated)
			
 
				-#         processed_original_keys = set()
			
 
				-#         for res in results:
			
 
				-#             item_id = res["product_id"]
			
 
				-
			
 
				-#             if "error" in res:
			
 
				-#                 # Add existing original attributes for failed products to the sheet
			
 
				-#                 for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
			
 
				-#                     if orig_item_id == item_id:
			
 
				-#                         ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
			
 
				-#                         ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
			
 
				-#                         ws_attributes.cell(row=row_num, column=3, value=orig_value)
			
 
				-#                         ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
			
 
				-#                         processed_original_keys.add((orig_item_id, orig_attr_name))
			
 
				-#                         row_num += 1
			
 
				-#                 continue
			
 
				-
			
 
				-#             # Combine all generated attributes (mandatory, additional, OCR, visual)
			
 
				-#             generated_attrs = {}
			
 
				-#             for cat in ["mandatory", "additional"]:
			
 
				-#                 attrs = res.get(cat, {})
			
 
				-#                 for attr_name, values in attrs.items():
			
 
				-#                     for val in values:
			
 
				-#                         key = (item_id, attr_name)
			
 
				-#                         if key not in generated_attrs:
			
 
				-#                             generated_attrs[key] = []
			
 
				-#                         generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
			
 
				-
			
 
				-#             # OCR extracted
			
 
				-#             ocr = res.get("ocr_results")
			
 
				-#             if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
			
 
				-#                 for attr_name, values in ocr["extracted_attributes"].items():
			
 
				-#                     for val in values:
			
 
				-#                         key = (item_id, attr_name)
			
 
				-#                         if key not in generated_attrs:
			
 
				-#                             generated_attrs[key] = []
			
 
				-#                         generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
			
 
				-
			
 
				-#             # Visual extracted
			
 
				-#             visual = res.get("visual_results")
			
 
				-#             if visual and "visual_attributes" in visual:
			
 
				-#                 vis_attrs = visual["visual_attributes"]
			
 
				-#                 if isinstance(vis_attrs, dict):
			
 
				-#                     # Handle dict format where value might be a list of dicts or a single value
			
 
				-#                     for attr_name, values in vis_attrs.items():
			
 
				-#                         if not isinstance(values, list):
			
 
				-#                             values = [{"value": values, "source": "visual"}]
			
 
				-#                         for val in values:
			
 
				-#                             key = (item_id, attr_name)
			
 
				-#                             if key not in generated_attrs:
			
 
				-#                                 generated_attrs[key] = []
			
 
				-#                             generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
			
 
				-#                 elif isinstance(vis_attrs, list):
			
 
				-#                     # Handle list of dicts format
			
 
				-#                     for item in vis_attrs:
			
 
				-#                         attr_name = item.get("attribute_name") or item.get("name")
			
 
				-#                         if not attr_name: continue
			
 
				-#                         value = item.get("value", "")
			
 
				-#                         source = item.get("source", "visual")
			
 
				-#                         key = (item_id, attr_name)
			
 
				-#                         if key not in generated_attrs:
			
 
				-#                             generated_attrs[key] = []
			
 
				-#                         generated_attrs[key].append(f"{value} (source: {source})")
			
 
				-
			
 
				-
			
 
				-#             # Write attributes to Excel
			
 
				-#             for (attr_item_id, attr_name), gen_values in generated_attrs.items():
			
 
				-#                 # Get original value from lookup (if it exists)
			
 
				-#                 original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
			
 
				-#                 # Combine multiple generated values into a single string
			
 
				-#                 generated_value = "; ".join(gen_values) if gen_values else ""
			
 
				-#                 # Write row
			
 
				-#                 ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
			
 
				-#                 ws_attributes.cell(row=row_num, column=2, value=attr_name)
			
 
				-#                 ws_attributes.cell(row=row_num, column=3, value=original_value)
			
 
				-#                 ws_attributes.cell(row=row_num, column=4, value=generated_value)
			
 
				-#                 processed_original_keys.add((attr_item_id, attr_name))
			
 
				-#                 row_num += 1
			
 
				-
			
 
				-#             # Add original attributes that have no generated values for this item_id
			
 
				-#             for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
			
 
				-#                 if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
			
 
				-#                     ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
			
 
				-#                     ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
			
 
				-#                     ws_attributes.cell(row=row_num, column=3, value=orig_value)
			
 
				-#                     ws_attributes.cell(row=row_num, column=4, value="") # No generated value
			
 
				-#                     processed_original_keys.add((orig_item_id, orig_attr_name))
			
 
				-#                     row_num += 1
			
 
				-        
			
 
				-#         # Add original attributes for products not included in the 'results' (e.g. if they didn't exist in product_list)
			
 
				-#         # We assume all products are in product_list, so this step might be redundant, but safe for completeness.
			
 
				-#         for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
			
 
				-#             if (orig_item_id, orig_attr_name) not in processed_original_keys:
			
 
				-#                 ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
			
 
				-#                 ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
			
 
				-#                 ws_attributes.cell(row=row_num, column=3, value=orig_value)
			
 
				-#                 ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
			
 
				-#                 row_num += 1
			
 
				-
			
 
				-
			
 
				-#         # Adjust column widths for attributes
			
 
				-#         for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
			
 
				-#              ws_attributes.column_dimensions[col_dim].width = width
			
 
				-
			
 
				-#         # Save the generated Excel (replace existing)
			
 
				-#         save_path = os.path.join(settings.MEDIA_ROOT, 'generated_products.xlsx')
			
 
				-#         wb.save(save_path)
			
 
				-#         logger.info(f"Excel file successfully saved to {save_path}")
			
 
				-        
			
 
				-#         # Write SUCCESS status
			
 
				-#         write_status("SUCCESS")
			
 
				-#         logger.info("Background task finished successfully.")
			
 
				-
			
 
				-
			
 
				-#     except Exception as e:
			
 
				-#         # Log the critical error and write FAILED status
			
 
				-#         logger.exception("CRITICAL ERROR during background Excel generation process.")
			
 
				-#         write_status("FAILED", error_msg=str(e))
			
 
				-
			
 
				-
			
 
				-# # -------------------------------------------------------------------------------------------------
			
 
				-
			
 
				-# class ProductUploadExcelView(APIView):
			
 
				-#     """
			
 
				-#     POST API to upload an Excel file. (Unchanged)
			
 
				-#     """
			
 
				-#     parser_classes = (MultiPartParser, FormParser)
			
 
				-
			
 
				-#     def post(self, request, *args, **kwargs):
			
 
				-#         file_obj = request.FILES.get('file')
			
 
				-#         if not file_obj:
			
 
				-#             return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
			
 
				-
			
 
				-#         try:
			
 
				-#             # Read all sheets from Excel file
			
 
				-#             excel_file = pd.ExcelFile(file_obj)
			
 
				-            
			
 
				-#             # Check if required sheets exist
			
 
				-#             if 'Products' not in excel_file.sheet_names:
			
 
				-#                  logger.error(f"Upload failed: Missing 'Products' sheet in file.")
			
 
				-#                  return Response({
			
 
				-#                      'error': "Missing 'Products' sheet",
			
 
				-#                      'available_sheets': excel_file.sheet_names
			
 
				-#                  }, status=status.HTTP_400_BAD_REQUEST)
			
 
				-            
			
 
				-#             # Read Products sheet
			
 
				-#             df_products = pd.read_excel(excel_file, sheet_name='Products')
			
 
				-#             df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
			
 
				-
			
 
				-#             # Check required columns for Products
			
 
				-#             expected_product_cols = {
			
 
				-#                  'item_id', 'product_name', 'product_long_description',
			
 
				-#                  'product_short_description', 'product_type', 'image_path'
			
 
				-#             }
			
 
				-
			
 
				-#             if not expected_product_cols.issubset(df_products.columns):
			
 
				-#                  logger.error(f"Upload failed: Missing required columns in Products sheet.")
			
 
				-#                  return Response({
			
 
				-#                      'error': 'Missing required columns in Products sheet',
			
 
				-#                      'required_columns': list(expected_product_cols),
			
 
				-#                      'found_columns': list(df_products.columns)
			
 
				-#                  }, status=status.HTTP_400_BAD_REQUEST)
			
 
				-
			
 
				-#             # Read Attribute_values sheet if it exists
			
 
				-#             df_attributes = None
			
 
				-#             has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
			
 
				-            
			
 
				-#             if has_attributes_sheet:
			
 
				-#                  df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
			
 
				-#                  df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
			
 
				-                 
			
 
				-#                  # Check required columns for Attribute_values
			
 
				-#                  expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
			
 
				-#                  if not expected_attr_cols.issubset(df_attributes.columns):
			
 
				-#                      logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
			
 
				-#                      return Response({
			
 
				-#                           'error': 'Missing required columns in Attribute_values sheet',
			
 
				-#                           'required_columns': list(expected_attr_cols),
			
 
				-#                           'found_columns': list(df_attributes.columns)
			
 
				-#                      }, status=status.HTTP_400_BAD_REQUEST)
			
 
				-
			
 
				-#             # Initialize counters
			
 
				-#             products_created = 0
			
 
				-#             products_updated = 0
			
 
				-#             attributes_created = 0
			
 
				-#             attributes_updated = 0
			
 
				-#             products_failed = 0
			
 
				-#             attributes_failed = 0
			
 
				-#             errors = []
			
 
				-
			
 
				-#             # Use transaction to ensure atomicity
			
 
				-#             with transaction.atomic():
			
 
				-#                  # Process Products sheet
			
 
				-#                  for idx, row in df_products.iterrows():
			
 
				-#                      item_id = str(row.get('item_id', '')).strip()
			
 
				-#                      product_type = str(row.get('product_type', '')).strip()
			
 
				-
			
 
				-#                      if not item_id:
			
 
				-#                          products_failed += 1
			
 
				-#                          errors.append(f"Products Row {idx + 2}: Missing item_id")
			
 
				-#                          continue
			
 
				-
			
 
				-#                      try:
			
 
				-#                          # Auto-create ProductType if provided and doesn't exist
			
 
				-#                          if product_type:
			
 
				-#                              ProductType.objects.get_or_create(name=product_type)
			
 
				-
			
 
				-#                          defaults = {
			
 
				-#                              'product_name': str(row.get('product_name', '')),
			
 
				-#                              'product_long_description': str(row.get('product_long_description', '')),
			
 
				-#                              'product_short_description': str(row.get('product_short_description', '')),
			
 
				-#                              'product_type': product_type,
			
 
				-#                              'image_path': str(row.get('image_path', '')),
			
 
				-#                          }
			
 
				-
			
 
				-#                          obj, created = Product.objects.update_or_create(
			
 
				-#                              item_id=item_id,
			
 
				-#                              defaults=defaults
			
 
				-#                          )
			
 
				-
			
 
				-#                          if created:
			
 
				-#                              products_created += 1
			
 
				-#                          else:
			
 
				-#                              products_updated += 1
			
 
				-#                      except Exception as e:
			
 
				-#                          products_failed += 1
			
 
				-#                          errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
			
 
				-#                          logger.error(f"Error processing product {item_id} in Products sheet: {e}")
			
 
				-
			
 
				-
			
 
				-#                  # Process Attribute_values sheet if it exists
			
 
				-#                  if has_attributes_sheet and df_attributes is not None:
			
 
				-#                       # Group by item_id to optimize lookups
			
 
				-#                       item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
			
 
				-                      
			
 
				-#                       # Fetch all products at once
			
 
				-#                       existing_products = {
			
 
				-#                           p.item_id: p 
			
 
				-#                           for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
			
 
				-#                       }
			
 
				-
			
 
				-#                       for idx, row in df_attributes.iterrows():
			
 
				-#                           item_id = str(row.get('item_id', '')).strip()
			
 
				-#                           attribute_name = str(row.get('attribute_name', '')).strip()
			
 
				-#                           original_value = str(row.get('original_value', '')).strip()
			
 
				-
			
 
				-#                           if not item_id or not attribute_name:
			
 
				-#                               attributes_failed += 1
			
 
				-#                               errors.append(
			
 
				-#                                   f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
			
 
				-#                               )
			
 
				-#                               continue
			
 
				-
			
 
				-#                           # Check if product exists
			
 
				-#                           product = existing_products.get(item_id)
			
 
				-#                           if not product:
			
 
				-#                               attributes_failed += 1
			
 
				-#                               errors.append(
			
 
				-#                                   f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
			
 
				-#                                   "Make sure it exists in Products sheet."
			
 
				-#                               )
			
 
				-#                               continue
			
 
				-
			
 
				-#                           try:
			
 
				-#                               attr_obj, created = ProductAttributeValue.objects.update_or_create(
			
 
				-#                                   product=product,
			
 
				-#                                   attribute_name=attribute_name,
			
 
				-#                                   defaults={'original_value': original_value}
			
 
				-#                               )
			
 
				-
			
 
				-#                               if created:
			
 
				-#                                   attributes_created += 1
			
 
				-#                               else:
			
 
				-#                                   attributes_updated += 1
			
 
				-#                           except Exception as e:
			
 
				-#                               attributes_failed += 1
			
 
				-#                               errors.append(
			
 
				-#                                   f"Attribute_values Row {idx + 2} "
			
 
				-#                                   f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
			
 
				-#                               )
			
 
				-#                               logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
			
 
				-
			
 
				-#             # Prepare response data
			
 
				-#             response_data = {
			
 
				-#                 'message': 'Upload completed',
			
 
				-#                 'products': {
			
 
				-#                     'created': products_created,
			
 
				-#                     'updated': products_updated,
			
 
				-#                     'failed': products_failed,
			
 
				-#                     'total_processed': products_created + products_updated + products_failed
			
 
				-#                 },
			
 
				-#                 'attribute_values': {
			
 
				-#                      'created': attributes_created,
			
 
				-#                      'updated': attributes_updated,
			
 
				-#                      'failed': attributes_failed,
			
 
				-#                      'total_processed': attributes_created + attributes_updated + attributes_failed
			
 
				-#                 } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
			
 
				-#                 'generated_excel_status': 'Excel generation started in the background.'
			
 
				-#             }
			
 
				-
			
 
				-#             if errors:
			
 
				-#                 response_data['errors'] = errors[:50]
			
 
				-#                 if len(errors) > 50:
			
 
				-#                     response_data['errors'].append(f"... and {len(errors) - 50} more errors")
			
 
				-
			
 
				-#             # Determine status code for upload
			
 
				-#             upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
			
 
				-
			
 
				-#             # Start background thread for Excel generation if upload was successful
			
 
				-#             if products_failed == 0 and attributes_failed == 0:
			
 
				-#                 logger.info("API call successful. Triggering background Excel generation thread.")
			
 
				-#                 threading.Thread(target=generate_product_excel_background, daemon=True).start()
			
 
				-                
			
 
				-#                 # Update response to provide monitoring paths
			
 
				-#                 response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
			
 
				-#                 response_data['monitoring'] = {
			
 
				-#                      'excel_file': os.path.join(settings.MEDIA_URL, 'generated_products.xlsx'),
			
 
				-#                      'status_file': os.path.join(settings.MEDIA_URL, 'excel_generation_status.json'),
			
 
				-#                      'log_file': os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
			
 
				-#                      'note': 'These files will be available once the background process completes.'
			
 
				-#                 }
			
 
				-#             else:
			
 
				-#                  logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
			
 
				-#                  response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
			
 
				-
			
 
				-
			
 
				-#             return Response(response_data, status=upload_status)
			
 
				-
			
 
				-#         except pd.errors.EmptyDataError:
			
 
				-#             logger.error('The uploaded Excel file is empty or invalid.')
			
 
				-#             return Response({
			
 
				-#                 'error': 'The uploaded Excel file is empty or invalid'
			
 
				-#             }, status=status.HTTP_400_BAD_REQUEST)
			
 
				-#         except Exception as e:
			
 
				-#             logger.exception(f'An unexpected error occurred while processing the file.')
			
 
				-#             return Response({
			
 
				-#                 'error': f'An unexpected error occurred while processing the file: {str(e)}'
			
 
				-#             }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
			
 
				 
			
 
				 
			
 
				 
			
--- a/attr_extraction/visual_processing_service.py
+++ b/attr_extraction/visual_processing_service.py
@@ -380,6 +380,11 @@ from sklearn.cluster import KMeans
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				+import os
			
 
				+os.environ['TOKENIZERS_PARALLELISM'] = 'false'  # Disable tokenizer warnings
			
 
				+import warnings
			
 
				+warnings.filterwarnings('ignore')  # Suppress all warnings
			
 
				+
			
 
				 
			
 
				 class VisualProcessingService:
			
 
				     """Service for extracting visual attributes from product images using CLIP with smart subcategory detection."""
			
@@ -585,6 +590,57 @@ class VisualProcessingService:
 
				         else:
			
 
				             return 'gray'
			
 
				     
			
 
				+    # def classify_with_clip(
			
 
				+    #     self,
			
 
				+    #     image: Image.Image,
			
 
				+    #     candidates: List[str],
			
 
				+    #     attribute_name: str,
			
 
				+    #     confidence_threshold: float = 0.15
			
 
				+    # ) -> Dict:
			
 
				+    #     """Use CLIP to classify image against candidate labels."""
			
 
				+    #     try:
			
 
				+    #         model, processor = self._get_clip_model()
			
 
				+    #         device = self._get_device()
			
 
				+            
			
 
				+    #         # Prepare inputs
			
 
				+    #         inputs = processor(
			
 
				+    #             text=candidates,
			
 
				+    #             images=image,
			
 
				+    #             return_tensors="pt",
			
 
				+    #             padding=True
			
 
				+    #         )
			
 
				+            
			
 
				+    #         # Move to device
			
 
				+    #         inputs = {k: v.to(device) for k, v in inputs.items()}
			
 
				+            
			
 
				+    #         # Get predictions
			
 
				+    #         with torch.no_grad():
			
 
				+    #             outputs = model(**inputs)
			
 
				+    #             logits_per_image = outputs.logits_per_image
			
 
				+    #             probs = logits_per_image.softmax(dim=1).cpu()
			
 
				+            
			
 
				+    #         # Get top predictions
			
 
				+    #         top_k = min(3, len(candidates))
			
 
				+    #         top_probs, top_indices = torch.topk(probs[0], k=top_k)
			
 
				+            
			
 
				+    #         results = []
			
 
				+    #         for prob, idx in zip(top_probs, top_indices):
			
 
				+    #             if prob.item() > confidence_threshold:
			
 
				+    #                 results.append({
			
 
				+    #                     "value": candidates[idx.item()],
			
 
				+    #                     "confidence": round(float(prob.item()), 3)
			
 
				+    #                 })
			
 
				+            
			
 
				+    #         return {
			
 
				+    #             "attribute": attribute_name,
			
 
				+    #             "predictions": results
			
 
				+    #         }
			
 
				+            
			
 
				+    #     except Exception as e:
			
 
				+    #         logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
			
 
				+    #         return {"attribute": attribute_name, "predictions": []}
			
 
				+    
			
 
				+
			
 
				     def classify_with_clip(
			
 
				         self,
			
 
				         image: Image.Image,
			
@@ -597,44 +653,54 @@ class VisualProcessingService:
 
				             model, processor = self._get_clip_model()
			
 
				             device = self._get_device()
			
 
				             
			
 
				-            # Prepare inputs
			
 
				-            inputs = processor(
			
 
				-                text=candidates,
			
 
				-                images=image,
			
 
				-                return_tensors="pt",
			
 
				-                padding=True
			
 
				-            )
			
 
				-            
			
 
				-            # Move to device
			
 
				-            inputs = {k: v.to(device) for k, v in inputs.items()}
			
 
				+            # ⚡ OPTIMIZATION: Process in smaller batches to avoid memory issues
			
 
				+            batch_size = 16  # Process 16 candidates at a time
			
 
				+            all_results = []
			
 
				             
			
 
				-            # Get predictions
			
 
				-            with torch.no_grad():
			
 
				-                outputs = model(**inputs)
			
 
				-                logits_per_image = outputs.logits_per_image
			
 
				-                probs = logits_per_image.softmax(dim=1).cpu()
			
 
				-            
			
 
				-            # Get top predictions
			
 
				-            top_k = min(3, len(candidates))
			
 
				-            top_probs, top_indices = torch.topk(probs[0], k=top_k)
			
 
				+            for i in range(0, len(candidates), batch_size):
			
 
				+                batch_candidates = candidates[i:i + batch_size]
			
 
				+                
			
 
				+                # Prepare inputs WITHOUT progress bars
			
 
				+                inputs = processor(
			
 
				+                    text=batch_candidates,
			
 
				+                    images=image,
			
 
				+                    return_tensors="pt",
			
 
				+                    padding=True
			
 
				+                )
			
 
				+                
			
 
				+                # Move to device
			
 
				+                inputs = {k: v.to(device) for k, v in inputs.items()}
			
 
				+                
			
 
				+                # Get predictions
			
 
				+                with torch.no_grad():
			
 
				+                    outputs = model(**inputs)
			
 
				+                    logits_per_image = outputs.logits_per_image
			
 
				+                    probs = logits_per_image.softmax(dim=1).cpu()
			
 
				+                
			
 
				+                # Collect results from this batch
			
 
				+                for j, prob in enumerate(probs[0]):
			
 
				+                    if prob.item() > confidence_threshold:
			
 
				+                        all_results.append({
			
 
				+                            "value": batch_candidates[j],
			
 
				+                            "confidence": round(float(prob.item()), 3)
			
 
				+                        })
			
 
				             
			
 
				-            results = []
			
 
				-            for prob, idx in zip(top_probs, top_indices):
			
 
				-                if prob.item() > confidence_threshold:
			
 
				-                    results.append({
			
 
				-                        "value": candidates[idx.item()],
			
 
				-                        "confidence": round(float(prob.item()), 3)
			
 
				-                    })
			
 
				+            # Sort by confidence and return top 3
			
 
				+            all_results.sort(key=lambda x: x['confidence'], reverse=True)
			
 
				             
			
 
				             return {
			
 
				                 "attribute": attribute_name,
			
 
				-                "predictions": results
			
 
				+                "predictions": all_results[:3]
			
 
				             }
			
 
				             
			
 
				         except Exception as e:
			
 
				             logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
			
 
				             return {"attribute": attribute_name, "predictions": []}
			
 
				-    
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				     def detect_category_and_subcategory(self, image: Image.Image) -> Tuple[str, str, str, float]:
			
 
				         """
			
 
				         Hierarchically detect category, subcategory, and specific product.
			
@@ -869,7 +935,6 @@ class VisualProcessingService:
 
				 
			
 
				 
			
 
				 
			
 
				-
			
 
				 
			
 
				 
			
 
				 # # ==================== visual_processing_service_enhanced.py ====================
			
--- a/db.sqlite3
+++ b/db.sqlite3
--- a/media/generated_outputs/excel_generation_status.json
+++ b/media/generated_outputs/excel_generation_status.json
@@ -1,6 +1,6 @@
 
				 {
			
 
				     "status": "SUCCESS",
			
 
				-    "timestamp": "2025-10-27T15:43:17.202230",
			
 
				+    "timestamp": "2025-10-28T11:44:45.161843",
			
 
				     "products_processed": 15,
			
 
				     "products_successful": 15,
			
 
				     "products_failed": 0,
			
--- a/media/generated_outputs/generated_products.xlsx
+++ b/media/generated_outputs/generated_products.xlsx
--- a/media/generated_outputs/~$generated_products.xlsx
+++ b/media/generated_outputs/~$generated_products.xlsx