3 mesi fa · afae1b63f2
--- a/attr_extraction/apps.py
+++ b/attr_extraction/apps.py
@@ -268,6 +268,121 @@
 
															+# # ==================== attr_extraction/apps.py ====================
														
 
															+# from django.apps import AppConfig
														
 
															+# import logging
														
 
															+# import sys
														
 
															+# import os
														
 
															+# import threading
														
 
															+
														
 
															+# from django.core.cache import cache  # ✅ Import Django cache
														
 
															+
														
 
															+# logger = logging.getLogger(__name__)
														
 
															+
														
 
															+
														
 
															+# class AttrExtractionConfig(AppConfig):
														
 
															+#     default_auto_field = 'django.db.models.BigAutoField'
														
 
															+#     name = 'attr_extraction'
														
 
															+    
														
 
															+#     models_loaded = False
														
 
															+    
														
 
															+#     def ready(self):
														
 
															+#         """
														
 
															+#         🔥 Pre-load all heavy ML models during Django startup.
														
 
															+#         Also clears Django cache once when the server starts.
														
 
															+#         """
														
 
															+#         # Skip during migrations/management commands
														
 
															+#         if any(cmd in sys.argv for cmd in ['migrate', 'makemigrations', 'test', 'collectstatic', 'shell']):
														
 
															+#             return
														
 
															+        
														
 
															+#         # Skip in Django autoreloader parent process
														
 
															+#         if os.environ.get('RUN_MAIN') != 'true':
														
 
															+#             logger.info("⏭️  Skipping model loading in autoreloader parent process")
														
 
															+#             return
														
 
															+        
														
 
															+#         # ✅ Clear cache once per startup
														
 
															+#         try:
														
 
															+#             cache.clear()
														
 
															+#             logger.info("🧹 Django cache cleared successfully on startup.")
														
 
															+#         except Exception as e:
														
 
															+#             logger.warning(f"⚠️  Failed to clear cache: {e}")
														
 
															+        
														
 
															+#         # Prevent double loading
														
 
															+#         if AttrExtractionConfig.models_loaded:
														
 
															+#             logger.info("⏭️  Models already loaded, skipping...")
														
 
															+#             return
														
 
															+        
														
 
															+#         AttrExtractionConfig.models_loaded = True
														
 
															+        
														
 
															+#         # Load models in background thread (non-blocking)
														
 
															+#         thread = threading.Thread(target=self._load_models, daemon=True)
														
 
															+#         thread.start()
														
 
															+        
														
 
															+#         logger.info("🔄 Model loading started in background...")
														
 
															+    
														
 
															+#     def _load_models(self):
														
 
															+#         """Background thread to load heavy models."""
														
 
															+#         import time
														
 
															+        
														
 
															+#         logger.info("=" * 70)
														
 
															+#         logger.info("🔥 WARMING UP ML MODELS (background process)")
														
 
															+#         logger.info("=" * 70)
														
 
															+        
														
 
															+#         startup_time = time.time()
														
 
															+#         total_loaded = 0
														
 
															+        
														
 
															+#         # 1. Sentence Transformer
														
 
															+#         # try:
														
 
															+#         #     logger.info("📥 Loading Sentence Transformer...")
														
 
															+#         #     st_start = time.time()
														
 
															+#         #     from .services import model_embedder
														
 
															+#         #     st_time = time.time() - st_start
														
 
															+#         #     logger.info(f"✓ Sentence Transformer ready ({st_time:.1f}s)")
														
 
															+#         #     total_loaded += 1
														
 
															+#         # except Exception as e:
														
 
															+#         #     logger.error(f"❌ Sentence Transformer failed: {e}")
														
 
															+        
														
 
															+#         # 2. Pre-load CLIP model
														
 
															+#         try:
														
 
															+#             logger.info("📥 Loading CLIP model (20-30s)...")
														
 
															+#             clip_start = time.time()
														
 
															+#             from .visual_processing_service import VisualProcessingService
														
 
															+#             VisualProcessingService._get_clip_model()
														
 
															+#             clip_time = time.time() - clip_start
														
 
															+#             logger.info(f"✓ CLIP model cached ({clip_time:.1f}s)")
														
 
															+#             total_loaded += 1
														
 
															+#         except Exception as e:
														
 
															+#             logger.error(f"❌ CLIP model failed: {e}")
														
 
															+        
														
 
															+#         # 3. Pre-load OCR model
														
 
															+#         try:
														
 
															+#             logger.info("📥 Loading EasyOCR model...")
														
 
															+#             ocr_start = time.time()
														
 
															+#             from .ocr_service import OCRService
														
 
															+#             ocr_service = OCRService()
														
 
															+#             ocr_service._get_reader()
														
 
															+#             ocr_time = time.time() - ocr_start
														
 
															+#             logger.info(f"✓ OCR model cached ({ocr_time:.1f}s)")
														
 
															+#             total_loaded += 1
														
 
															+#         except Exception as e:
														
 
															+#             logger.error(f"❌ OCR model failed: {e}")
														
 
															+        
														
 
															+#         total_time = time.time() - startup_time
														
 
															+        
														
 
															+#         logger.info("=" * 70)
														
 
															+#         logger.info(f"🎉 {total_loaded}/3 MODELS LOADED in {total_time:.1f}s")
														
 
															+#         logger.info("⚡ API requests are now FAST (2-5 seconds)")
														
 
															+#         logger.info("=" * 70)
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															 # ==================== attr_extraction/apps.py ====================
														
 
															 from django.apps import AppConfig
														
 
															 import logging
														
@@ -331,18 +446,9 @@ class AttrExtractionConfig(AppConfig):
 
															         startup_time = time.time()
														
 
															         total_loaded = 0
														
 
															-        # 1. Sentence Transformer
														
 
															-        try:
														
 
															-            logger.info("📥 Loading Sentence Transformer...")
														
 
															-            st_start = time.time()
														
 
															-            from .services import model_embedder
														
 
															-            st_time = time.time() - st_start
														
 
															-            logger.info(f"✓ Sentence Transformer ready ({st_time:.1f}s)")
														
 
															-            total_loaded += 1
														
 
															-        except Exception as e:
														
 
															-            logger.error(f"❌ Sentence Transformer failed: {e}")
														
 
															+        # REMOVED: Sentence Transformer (no longer used in services.py)
														
 
															-        # 2. Pre-load CLIP model
														
 
															+        # 1. Pre-load CLIP model
														
 
															         try:
														
 
															             logger.info("📥 Loading CLIP model (20-30s)...")
														
 
															             clip_start = time.time()
														
@@ -354,7 +460,7 @@ class AttrExtractionConfig(AppConfig):
 
															         except Exception as e:
														
 
															             logger.error(f"❌ CLIP model failed: {e}")
														
 
															-        # 3. Pre-load OCR model
														
 
															+        # 2. Pre-load OCR model
														
 
															         try:
														
 
															             logger.info("📥 Loading EasyOCR model...")
														
 
															             ocr_start = time.time()
														
@@ -370,6 +476,6 @@ class AttrExtractionConfig(AppConfig):
 
															         total_time = time.time() - startup_time
														
 
															         logger.info("=" * 70)
														
 
															-        logger.info(f"🎉 {total_loaded}/3 MODELS LOADED in {total_time:.1f}s")
														
 
															+        logger.info(f"🎉 {total_loaded}/2 MODELS LOADED in {total_time:.1f}s")
														
 
															         logger.info("⚡ API requests are now FAST (2-5 seconds)")
														
 
															         logger.info("=" * 70)
														
--- a/attr_extraction/services.py
+++ b/attr_extraction/services.py
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -174,10 +174,260 @@ class ExtractProductAttributesView(APIView):
 
															 # ==================== OPTIMIZED BATCH VIEW ====================
														
 
															+# class BatchExtractProductAttributesView(APIView):
														
 
															+#     """
														
 
															+#     ⚡ PERFORMANCE OPTIMIZED: Batch extraction with intelligent parallelization
														
 
															+#     Expected performance: 10 products in 30-60 seconds (with image processing)
														
 
															+#     """
														
 
															+
														
 
															+#     def post(self, request):
														
 
															+#         import time
														
 
															+#         start_time = time.time()
														
 
															+
														
 
															+#         serializer = BatchProductRequestSerializer(data=request.data)
														
 
															+#         if not serializer.is_valid():
														
 
															+#             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
														
 
															+
														
 
															+#         validated_data = serializer.validated_data
														
 
															+#         product_list = validated_data.get("products", [])
														
 
															+        
														
 
															+#         logger.info(f"🚀 Starting batch processing for {len(product_list)} products")
														
 
															+        
														
 
															+#         # ==================== OPTIMIZATION 1: Bulk DB Query ====================
														
 
															+#         item_ids = [p['item_id'] for p in product_list]
														
 
															+#         products_queryset = Product.objects.filter(
														
 
															+#             item_id__in=item_ids
														
 
															+#         ).prefetch_related('attribute_values')
														
 
															+        
														
 
															+#         product_map = {product.item_id: product for product in products_queryset}
														
 
															+        
														
 
															+#         # Prefetch ALL original attribute values in ONE query
														
 
															+#         original_values_qs = ProductAttributeValue.objects.filter(
														
 
															+#             product__item_id__in=item_ids
														
 
															+#         ).select_related('product')
														
 
															+        
														
 
															+#         original_values_map = {}
														
 
															+#         for attr_val in original_values_qs:
														
 
															+#             item_id = attr_val.product.item_id
														
 
															+#             if item_id not in original_values_map:
														
 
															+#                 original_values_map[item_id] = {}
														
 
															+#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
														
 
															+        
														
 
															+#         logger.info(f"✓ Loaded {len(product_map)} products from database")
														
 
															+        
														
 
															+#         # Extract settings
														
 
															+#         model = validated_data.get("model")
														
 
															+#         extract_additional = validated_data.get("extract_additional", True)
														
 
															+#         process_image = validated_data.get("process_image", True)
														
 
															+#         multiple = validated_data.get("multiple", [])
														
 
															+#         threshold_abs = validated_data.get("threshold_abs", 0.65)
														
 
															+#         margin = validated_data.get("margin", 0.15)
														
 
															+#         use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", False)
														
 
															+#         use_adaptive_margin = validated_data.get("use_adaptive_margin", False)
														
 
															+#         use_semantic_clustering = validated_data.get("use_semantic_clustering", False)
														
 
															+        
														
 
															+#         results = []
														
 
															+#         successful = 0
														
 
															+#         failed = 0
														
 
															+        
														
 
															+#         # ==================== OPTIMIZATION 2: Conditional Service Init ====================
														
 
															+#         # Only initialize if processing images
														
 
															+#         ocr_service = None
														
 
															+#         visual_service = None
														
 
															+        
														
 
															+#         if process_image:
														
 
															+#             from .ocr_service import OCRService
														
 
															+#             from .visual_processing_service import VisualProcessingService
														
 
															+#             ocr_service = OCRService()
														
 
															+#             visual_service = VisualProcessingService()
														
 
															+#             logger.info("✓ Image processing services initialized")
														
 
															+
														
 
															+#         # ==================== OPTIMIZATION 3: Smart Parallelization ====================
														
 
															+#         def process_single_product(product_entry):
														
 
															+#             """Process a single product (runs in parallel)"""
														
 
															+#             import time
														
 
															+#             product_start = time.time()
														
 
															+            
														
 
															+#             item_id = product_entry['item_id']
														
 
															+#             mandatory_attrs = product_entry['mandatory_attrs']
														
 
															+
														
 
															+#             if item_id not in product_map:
														
 
															+#                 return {
														
 
															+#                     "product_id": item_id,
														
 
															+#                     "error": "Product not found in database"
														
 
															+#                 }, False
														
 
															+
														
 
															+#             product = product_map[item_id]
														
 
															+            
														
 
															+#             try:
														
 
															+#                 title = product.product_name
														
 
															+#                 short_desc = product.product_short_description
														
 
															+#                 long_desc = product.product_long_description
														
 
															+#                 image_url = product.image_path
														
 
															+                
														
 
															+#                 ocr_results = None
														
 
															+#                 ocr_text = None
														
 
															+#                 visual_results = None
														
 
															+
														
 
															+#                 # ⚡ SKIP IMAGE PROCESSING IF DISABLED (HUGE TIME SAVER)
														
 
															+#                 if process_image and image_url:
														
 
															+#                     if ocr_service:
														
 
															+#                         ocr_results = ocr_service.process_image(image_url)
														
 
															+                        
														
 
															+#                         if ocr_results and ocr_results.get("detected_text"):
														
 
															+#                             ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
														
 
															+#                                 ocr_results, model
														
 
															+#                             )
														
 
															+#                             ocr_results["extracted_attributes"] = ocr_attrs
														
 
															+#                             ocr_text = "\n".join([
														
 
															+#                                 f"{item['text']} (confidence: {item['confidence']:.2f})"
														
 
															+#                                 for item in ocr_results["detected_text"]
														
 
															+#                             ])
														
 
															+                    
														
 
															+#                     if visual_service:
														
 
															+#                         product_type_hint = product.product_type if hasattr(product, 'product_type') else None
														
 
															+#                         visual_results = visual_service.process_image(image_url, product_type_hint)
														
 
															+                        
														
 
															+#                         if visual_results and visual_results.get('visual_attributes'):
														
 
															+#                             visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
														
 
															+#                                 visual_results['visual_attributes']
														
 
															+#                             )
														
 
															+
														
 
															+#                 # Combine product text with source tracking
														
 
															+#                 product_text, source_map = ProductAttributeService.combine_product_text(
														
 
															+#                     title=title,
														
 
															+#                     short_desc=short_desc,
														
 
															+#                     long_desc=long_desc,
														
 
															+#                     ocr_text=ocr_text
														
 
															+#                 )
														
 
															+
														
 
															+#                 # ⚡ EXTRACT ATTRIBUTES WITH CACHING ENABLED
														
 
															+#                 extracted = ProductAttributeService.extract_attributes(
														
 
															+#                     product_text=product_text,
														
 
															+#                     mandatory_attrs=mandatory_attrs,
														
 
															+#                     source_map=source_map,
														
 
															+#                     model=model,
														
 
															+#                     extract_additional=extract_additional,
														
 
															+#                     multiple=multiple,
														
 
															+#                     # threshold_abs=threshold_abs,
														
 
															+#                     # margin=margin,
														
 
															+#                     # use_dynamic_thresholds=use_dynamic_thresholds,
														
 
															+#                     # use_adaptive_margin=use_adaptive_margin,
														
 
															+#                     # use_semantic_clustering=use_semantic_clustering,
														
 
															+#                     use_cache=True  # ⚡ CRITICAL: Enable caching
														
 
															+#                 )
														
 
															+
														
 
															+#                 # Add original values
														
 
															+#                 original_attrs = original_values_map.get(item_id, {})
														
 
															+                
														
 
															+#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
														
 
															+#                     if isinstance(attr_values, list):
														
 
															+#                         for attr_obj in attr_values:
														
 
															+#                             if isinstance(attr_obj, dict):
														
 
															+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															+                
														
 
															+#                 for attr_name, attr_values in extracted.get("additional", {}).items():
														
 
															+#                     if isinstance(attr_values, list):
														
 
															+#                         for attr_obj in attr_values:
														
 
															+#                             if isinstance(attr_obj, dict):
														
 
															+#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															+
														
 
															+#                 result = {
														
 
															+#                     "product_id": product.item_id,
														
 
															+#                     "mandatory": extracted.get("mandatory", {}),
														
 
															+#                     "additional": extracted.get("additional", {}),
														
 
															+#                 }
														
 
															+
														
 
															+#                 if ocr_results:
														
 
															+#                     result["ocr_results"] = ocr_results
														
 
															+                
														
 
															+#                 if visual_results:
														
 
															+#                     result["visual_results"] = visual_results
														
 
															+                
														
 
															+#                 processing_time = time.time() - product_start
														
 
															+#                 logger.info(f"✓ Processed {item_id} in {processing_time:.2f}s")
														
 
															+
														
 
															+#                 return result, True
														
 
															+
														
 
															+#             except Exception as e:
														
 
															+#                 logger.error(f"❌ Error processing {item_id}: {str(e)}")
														
 
															+#                 return {
														
 
															+#                     "product_id": item_id,
														
 
															+#                     "error": str(e)
														
 
															+#                 }, False
														
 
															+
														
 
															+#         # ==================== OPTIMIZATION 4: Parallel Execution ====================
														
 
															+#         # Adjust workers based on whether image processing is enabled
														
 
															+#         max_workers = min(3 if process_image else 10, len(product_list))
														
 
															+        
														
 
															+#         logger.info(f"⚡ Using {max_workers} parallel workers")
														
 
															+        
														
 
															+#         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
														
 
															+#             # Submit all tasks
														
 
															+#             future_to_product = {
														
 
															+#                 executor.submit(process_single_product, product): product
														
 
															+#                 for product in product_list
														
 
															+#             }
														
 
															+            
														
 
															+#             # Collect results as they complete
														
 
															+#             for future in concurrent.futures.as_completed(future_to_product):
														
 
															+#                 try:
														
 
															+#                     result, success = future.result()
														
 
															+#                     results.append(result)
														
 
															+#                     if success:
														
 
															+#                         successful += 1
														
 
															+#                     else:
														
 
															+#                         failed += 1
														
 
															+#                 except Exception as e:
														
 
															+#                     failed += 1
														
 
															+#                     logger.error(f"❌ Future execution error: {str(e)}")
														
 
															+#                     results.append({
														
 
															+#                         "product_id": "unknown",
														
 
															+#                         "error": str(e)
														
 
															+#                     })
														
 
															+
														
 
															+#         total_time = time.time() - start_time
														
 
															+        
														
 
															+#         # Get cache statistics
														
 
															+#         cache_stats = ProductAttributeService.get_cache_stats()
														
 
															+        
														
 
															+#         logger.info(f"""
														
 
															+# 🎉 BATCH PROCESSING COMPLETE
														
 
															+# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
														
 
															+#   Total products: {len(product_list)}
														
 
															+#   Successful: {successful}
														
 
															+#   Failed: {failed}
														
 
															+#   Total time: {total_time:.2f}s
														
 
															+#   Avg time/product: {total_time/len(product_list):.2f}s
														
 
															+# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
														
 
															+#         """)
														
 
															+
														
 
															+#         batch_result = {
														
 
															+#             "results": results,
														
 
															+#             "total_products": len(product_list),
														
 
															+#             "successful": successful,
														
 
															+#             "failed": failed,
														
 
															+#             "performance": {
														
 
															+#                 "total_time_seconds": round(total_time, 2),
														
 
															+#                 "avg_time_per_product": round(total_time / len(product_list), 2),
														
 
															+#                 "workers_used": max_workers
														
 
															+#             },
														
 
															+#             "cache_stats": cache_stats
														
 
															+#         }
														
 
															+
														
 
															+#         response_serializer = BatchProductResponseSerializer(data=batch_result)
														
 
															+#         if response_serializer.is_valid():
														
 
															+#             return Response(response_serializer.data, status=status.HTTP_200_OK)
														
 
															+
														
 
															+#         return Response(batch_result, status=status.HTTP_200_OK)
														
 
															+
														
 
															+
														
 
															+# VERSION WITH PARALLELIZATION
														
 
															 class BatchExtractProductAttributesView(APIView):
														
 
															     """
														
 
															     ⚡ PERFORMANCE OPTIMIZED: Batch extraction with intelligent parallelization
														
 
															     Expected performance: 10 products in 30-60 seconds (with image processing)
														
 
															+    NOW WITH USER VALUE REASONING
														
 
															     """
														
 
															     def post(self, request):
														
@@ -214,6 +464,7 @@ class BatchExtractProductAttributesView(APIView):
 
															             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
														
 
															         logger.info(f"✓ Loaded {len(product_map)} products from database")
														
 
															+        logger.info(f"✓ Loaded user values for {len(original_values_map)} products")
														
 
															         # Extract settings
														
 
															         model = validated_data.get("model")
														
@@ -231,7 +482,6 @@ class BatchExtractProductAttributesView(APIView):
 
															         failed = 0
														
 
															         # ==================== OPTIMIZATION 2: Conditional Service Init ====================
														
 
															-        # Only initialize if processing images
														
 
															         ocr_service = None
														
 
															         visual_service = None
														
@@ -269,7 +519,7 @@ class BatchExtractProductAttributesView(APIView):
 
															                 ocr_text = None
														
 
															                 visual_results = None
														
 
															-                # ⚡ SKIP IMAGE PROCESSING IF DISABLED (HUGE TIME SAVER)
														
 
															+                # ⚡ SKIP IMAGE PROCESSING IF DISABLED
														
 
															                 if process_image and image_url:
														
 
															                     if ocr_service:
														
 
															                         ocr_results = ocr_service.process_image(image_url)
														
@@ -301,7 +551,13 @@ class BatchExtractProductAttributesView(APIView):
 
															                     ocr_text=ocr_text
														
 
															                 )
														
 
															-                # ⚡ EXTRACT ATTRIBUTES WITH CACHING ENABLED
														
 
															+                # 🆕 GET USER-ENTERED VALUES FOR THIS PRODUCT
														
 
															+                user_entered_values = original_values_map.get(item_id, {})
														
 
															+                print("user entered values are ")
														
 
															+                print(user_entered_values)
														
 
															+                logger.info(f"Processing {item_id} with {len(user_entered_values)} user-entered values")
														
 
															+
														
 
															+                # ⚡ EXTRACT ATTRIBUTES WITH USER VALUES AND REASONING
														
 
															                 extracted = ProductAttributeService.extract_attributes(
														
 
															                     product_text=product_text,
														
 
															                     mandatory_attrs=mandatory_attrs,
														
@@ -309,29 +565,13 @@ class BatchExtractProductAttributesView(APIView):
 
															                     model=model,
														
 
															                     extract_additional=extract_additional,
														
 
															                     multiple=multiple,
														
 
															-                    # threshold_abs=threshold_abs,
														
 
															-                    # margin=margin,
														
 
															-                    # use_dynamic_thresholds=use_dynamic_thresholds,
														
 
															-                    # use_adaptive_margin=use_adaptive_margin,
														
 
															-                    # use_semantic_clustering=use_semantic_clustering,
														
 
															-                    use_cache=True  # ⚡ CRITICAL: Enable caching
														
 
															+                    use_cache=True,
														
 
															+                    user_entered_values=user_entered_values  # 🆕 PASS USER VALUES
														
 
															                 )
														
 
															-                # Add original values
														
 
															-                original_attrs = original_values_map.get(item_id, {})
														
 
															-                
														
 
															-                for attr_name, attr_values in extracted.get("mandatory", {}).items():
														
 
															-                    if isinstance(attr_values, list):
														
 
															-                        for attr_obj in attr_values:
														
 
															-                            if isinstance(attr_obj, dict):
														
 
															-                                attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															+                # NOTE: Original values are now part of LLM response with reasoning
														
 
															+                # No need to add them separately - they're already in the "user_value" field
														
 
															-                for attr_name, attr_values in extracted.get("additional", {}).items():
														
 
															-                    if isinstance(attr_values, list):
														
 
															-                        for attr_obj in attr_values:
														
 
															-                            if isinstance(attr_obj, dict):
														
 
															-                                attr_obj["original_value"] = original_attrs.get(attr_name, "")
														
 
															-
														
 
															                 result = {
														
 
															                     "product_id": product.item_id,
														
 
															                     "mandatory": extracted.get("mandatory", {}),
														
@@ -357,19 +597,16 @@ class BatchExtractProductAttributesView(APIView):
 
															                 }, False
														
 
															         # ==================== OPTIMIZATION 4: Parallel Execution ====================
														
 
															-        # Adjust workers based on whether image processing is enabled
														
 
															         max_workers = min(3 if process_image else 10, len(product_list))
														
 
															         logger.info(f"⚡ Using {max_workers} parallel workers")
														
 
															         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
														
 
															-            # Submit all tasks
														
 
															             future_to_product = {
														
 
															                 executor.submit(process_single_product, product): product
														
 
															                 for product in product_list
														
 
															             }
														
 
															-            # Collect results as they complete
														
 
															             for future in concurrent.futures.as_completed(future_to_product):
														
 
															                 try:
														
 
															                     result, success = future.result()
														
@@ -399,7 +636,6 @@ class BatchExtractProductAttributesView(APIView):
 
															   Failed: {failed}
														
 
															   Total time: {total_time:.2f}s
														
 
															   Avg time/product: {total_time/len(product_list):.2f}s
														
 
															-  Cache hit rate: {cache_stats['embedding_cache']['hit_rate_percent']:.1f}%
														
 
															 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
														
 
															         """)
														
@@ -423,6 +659,7 @@ class BatchExtractProductAttributesView(APIView):
 
															         return Response(batch_result, status=status.HTTP_200_OK)
														
 
															+
														
 
															 class ProductListView(APIView):
														
 
															     """
														
 
															     GET API to list all products with details
														
--- a/db.sqlite3
+++ b/db.sqlite3