فهرست منبع

Threshold added

Student Yadav 3 ماه پیش
والد
کامیت
c3d9c6977f
3فایلهای تغییر یافته به همراه319 افزوده شده و 74 حذف شده
  1. 179 31
      attr_extraction/serializers.py
  2. 122 42
      attr_extraction/services.py
  3. 18 1
      attr_extraction/views.py

+ 179 - 31
attr_extraction/serializers.py

@@ -1,3 +1,9 @@
+
+
+
+
+
+# # ==================== serializers.py ====================
 # from rest_framework import serializers
 
 # class ProductInputSerializer(serializers.Serializer):
@@ -6,19 +12,36 @@
 #     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
 #     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
 #     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
+#     image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
+
+# class MandatoryAttrsField(serializers.DictField):
+#     """Custom DictField to validate mandatory_attrs structure."""
+#     child = serializers.ListField(child=serializers.CharField())
+
+# class ProductBatchInputSerializer(serializers.Serializer):
+#     """Serializer for an individual product input within the batch request."""
+#     item_id = serializers.CharField(required=True)
+#     mandatory_attrs = MandatoryAttrsField(
+#         required=True,
+#         help_text="A dictionary of attribute names and their possible values."
+#     )
+#     # You can also allow per-product model/flags if needed, but keeping it batch-level for simplicity here.
 
 
 # class SingleProductRequestSerializer(serializers.Serializer):
 #     """Serializer for single product extraction request."""
-#     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-#     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-#     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
+#     # title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
+#     # short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
+#     # long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
+#     # image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
+#     item_id = serializers.CharField(required=True)
 #     mandatory_attrs = serializers.DictField(
 #         child=serializers.ListField(child=serializers.CharField()),
 #         required=True
 #     )
 #     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
 #     extract_additional = serializers.BooleanField(required=False, default=True)
+#     process_image = serializers.BooleanField(required=False, default=True)
 
 #     def validate_model(self, value):
 #         from django.conf import settings
@@ -29,20 +52,19 @@
 #         return value
 
 
+
 # class BatchProductRequestSerializer(serializers.Serializer):
-#     """Serializer for batch product extraction request."""
+#     """Serializer for batch product extraction request (with item-specific attributes)."""
 #     products = serializers.ListField(
-#         child=ProductInputSerializer(),
+#         child=ProductBatchInputSerializer(), # <--- Changed
 #         required=True,
 #         min_length=1
 #     )
-#     mandatory_attrs = serializers.DictField(
-#         child=serializers.ListField(child=serializers.CharField()),
-#         required=True
-#     )
 #     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
 #     extract_additional = serializers.BooleanField(required=False, default=True)
-
+#     process_image = serializers.BooleanField(required=False, default=True)
+    
+#     # ... validate_model method ...
 #     def validate_model(self, value):
 #         from django.conf import settings
 #         if value not in settings.SUPPORTED_MODELS:
@@ -50,7 +72,8 @@
 #                 f"Model must be one of {settings.SUPPORTED_MODELS}"
 #             )
 #         return value
-
+    
+#     # ... validate_products method (updated to use products instead of item_ids) ...
 #     def validate_products(self, value):
 #         from django.conf import settings
 #         max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
@@ -60,12 +83,18 @@
 #             )
 #         return value
 
+# class OCRResultSerializer(serializers.Serializer):
+#     """Serializer for OCR results."""
+#     detected_text = serializers.ListField(child=serializers.DictField())
+#     extracted_attributes = serializers.DictField()
+
 
 # class ProductAttributeResultSerializer(serializers.Serializer):
 #     """Serializer for individual product extraction result."""
 #     product_id = serializers.CharField(required=False)
 #     mandatory = serializers.DictField()
 #     additional = serializers.DictField(required=False)
+#     ocr_results = OCRResultSerializer(required=False)
 #     error = serializers.CharField(required=False)
 #     raw_output = serializers.CharField(required=False)
 
@@ -76,6 +105,101 @@
 #     total_products = serializers.IntegerField()
 #     successful = serializers.IntegerField()
 #     failed = serializers.IntegerField()
+
+
+
+
+# from rest_framework import serializers
+# from .models import Product
+
+# class ProductSerializer(serializers.ModelSerializer):
+#     product_type_details = serializers.SerializerMethodField()
+    
+#     class Meta:
+#         model = Product
+#         fields = [
+#             'id',
+#             'item_id',
+#             'product_name',
+#             'product_long_description',
+#             'product_short_description',
+#             'product_type',
+#             'image_path',
+#             'image',
+#             'product_type_details',  # new field
+#         ]
+
+#     def get_product_type_details(self, obj):
+#         # Fetch ProductType object for this product
+#         try:
+#             product_type = ProductType.objects.get(name=obj.product_type)
+#         except ProductType.DoesNotExist:
+#             return []
+
+#         # Serialize its attributes
+#         attributes = ProductAttribute.objects.filter(product_type=product_type)
+#         return [
+#             {
+#                 "attribute_name": attr.name,
+#                 "is_mandatory": "Yes" if attr.is_mandatory else "No",
+#                 "possible_values": [pv.value for pv in attr.possible_values.all()]
+#             }
+#             for attr in attributes
+#         ]
+
+
+
+# from rest_framework import serializers
+# from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
+
+# class AttributePossibleValueSerializer(serializers.ModelSerializer):
+#     class Meta:
+#         model = AttributePossibleValue
+#         fields = ['value']
+
+# class ProductAttributeSerializer(serializers.ModelSerializer):
+#     possible_values = AttributePossibleValueSerializer(many=True, read_only=True)
+    
+#     class Meta:
+#         model = ProductAttribute
+#         fields = ['name', 'is_mandatory', 'possible_values']
+
+# class ProductTypeSerializer(serializers.ModelSerializer):
+#     attributes = ProductAttributeSerializer(many=True, read_only=True)
+    
+#     class Meta:
+#         model = ProductType
+#         fields = ['name', 'attributes']
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 
 
@@ -86,6 +210,8 @@
 
 # ==================== serializers.py ====================
 from rest_framework import serializers
+from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
+
 
 class ProductInputSerializer(serializers.Serializer):
     """Serializer for individual product input."""
@@ -95,10 +221,12 @@ class ProductInputSerializer(serializers.Serializer):
     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
     image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
 
+
 class MandatoryAttrsField(serializers.DictField):
     """Custom DictField to validate mandatory_attrs structure."""
     child = serializers.ListField(child=serializers.CharField())
 
+
 class ProductBatchInputSerializer(serializers.Serializer):
     """Serializer for an individual product input within the batch request."""
     item_id = serializers.CharField(required=True)
@@ -106,15 +234,10 @@ class ProductBatchInputSerializer(serializers.Serializer):
         required=True,
         help_text="A dictionary of attribute names and their possible values."
     )
-    # You can also allow per-product model/flags if needed, but keeping it batch-level for simplicity here.
 
 
 class SingleProductRequestSerializer(serializers.Serializer):
     """Serializer for single product extraction request."""
-    # title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-    # short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-    # long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-    # image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
     item_id = serializers.CharField(required=True)
     mandatory_attrs = serializers.DictField(
         child=serializers.ListField(child=serializers.CharField()),
@@ -123,6 +246,17 @@ class SingleProductRequestSerializer(serializers.Serializer):
     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
     extract_additional = serializers.BooleanField(required=False, default=True)
     process_image = serializers.BooleanField(required=False, default=True)
+    multiple = serializers.ListField(
+        child=serializers.CharField(),
+        required=False,
+        default=list,
+        help_text="List of attribute names that can have multiple values"
+    )
+    threshold_abs = serializers.FloatField(default=0.65, required=False)
+    margin = serializers.FloatField(default=0.15, required=False)
+    use_dynamic_thresholds = serializers.BooleanField(default=True, required=False)
+    use_adaptive_margin = serializers.BooleanField(default=True, required=False)
+    use_semantic_clustering = serializers.BooleanField(default=True, required=False)
 
     def validate_model(self, value):
         from django.conf import settings
@@ -133,19 +267,28 @@ class SingleProductRequestSerializer(serializers.Serializer):
         return value
 
 
-
 class BatchProductRequestSerializer(serializers.Serializer):
     """Serializer for batch product extraction request (with item-specific attributes)."""
     products = serializers.ListField(
-        child=ProductBatchInputSerializer(), # <--- Changed
+        child=ProductBatchInputSerializer(),
         required=True,
         min_length=1
     )
     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
     extract_additional = serializers.BooleanField(required=False, default=True)
     process_image = serializers.BooleanField(required=False, default=True)
+    multiple = serializers.ListField(
+        child=serializers.CharField(),
+        required=False,
+        default=list,
+        help_text="List of attribute names that can have multiple values"
+    )
+    threshold_abs = serializers.FloatField(default=0.65, required=False)
+    margin = serializers.FloatField(default=0.15, required=False)
+    use_dynamic_thresholds = serializers.BooleanField(default=True, required=False)
+    use_adaptive_margin = serializers.BooleanField(default=True, required=False)
+    use_semantic_clustering = serializers.BooleanField(default=True, required=False)
     
-    # ... validate_model method ...
     def validate_model(self, value):
         from django.conf import settings
         if value not in settings.SUPPORTED_MODELS:
@@ -154,7 +297,6 @@ class BatchProductRequestSerializer(serializers.Serializer):
             )
         return value
     
-    # ... validate_products method (updated to use products instead of item_ids) ...
     def validate_products(self, value):
         from django.conf import settings
         max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
@@ -164,6 +306,7 @@ class BatchProductRequestSerializer(serializers.Serializer):
             )
         return value
 
+
 class OCRResultSerializer(serializers.Serializer):
     """Serializer for OCR results."""
     detected_text = serializers.ListField(child=serializers.DictField())
@@ -188,12 +331,8 @@ class BatchProductResponseSerializer(serializers.Serializer):
     failed = serializers.IntegerField()
 
 
-
-
-from rest_framework import serializers
-from .models import Product
-
 class ProductSerializer(serializers.ModelSerializer):
+    """Serializer for Product model with product type details."""
     product_type_details = serializers.SerializerMethodField()
     
     class Meta:
@@ -207,11 +346,11 @@ class ProductSerializer(serializers.ModelSerializer):
             'product_type',
             'image_path',
             'image',
-            'product_type_details',  # new field
+            'product_type_details',
         ]
 
     def get_product_type_details(self, obj):
-        # Fetch ProductType object for this product
+        """Fetch ProductType object and its attributes for this product."""
         try:
             product_type = ProductType.objects.get(name=obj.product_type)
         except ProductType.DoesNotExist:
@@ -229,25 +368,34 @@ class ProductSerializer(serializers.ModelSerializer):
         ]
 
 
-
-from rest_framework import serializers
-from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
-
 class AttributePossibleValueSerializer(serializers.ModelSerializer):
+    """Serializer for AttributePossibleValue model."""
     class Meta:
         model = AttributePossibleValue
         fields = ['value']
 
+
 class ProductAttributeSerializer(serializers.ModelSerializer):
+    """Serializer for ProductAttribute model with possible values."""
     possible_values = AttributePossibleValueSerializer(many=True, read_only=True)
     
     class Meta:
         model = ProductAttribute
         fields = ['name', 'is_mandatory', 'possible_values']
 
+
 class ProductTypeSerializer(serializers.ModelSerializer):
+    """Serializer for ProductType model with attributes."""
     attributes = ProductAttributeSerializer(many=True, read_only=True)
     
     class Meta:
         model = ProductType
         fields = ['name', 'attributes']
+
+
+
+
+
+
+
+        

+ 122 - 42
attr_extraction/services.py

@@ -366,17 +366,32 @@ If an attribute is not present, do not include it in the response.
             scores.sort(key=lambda x: x[1], reverse=True)
             best_val, best_score = scores[0]
 
+            # DEBUG: Print scores
+            print(f"\n{'='*80}")
+            print(f"Attribute: {attr}")
+            print(f"{'='*80}")
+            print(f"Top 5 Scores:")
+            for i, (val, sc) in enumerate(scores[:5]):
+                print(f"  {i+1}. {val}: {sc:.4f}")
+            print(f"\nBest: {best_val} (score: {best_score:.4f})")
+            print(f"Base Threshold: {threshold_abs}")
+            print(f"Base Margin: {margin}")
+
             # Calculate adaptive margin if enabled
             effective_margin = margin
             if allow_multiple and use_adaptive_margin:
                 effective_margin = ProductAttributeService.get_adaptive_margin(scores, margin)
+                print(f"Adaptive Margin: {effective_margin}")
 
             if not allow_multiple:
                 source = ProductAttributeService.find_value_source(best_val, source_map)
                 extracted[attr] = [{"value": best_val, "source": source}]
+                print(f"Single value mode - Selected: {best_val}")
             else:
+                print(f"\nMultiple value mode enabled")
                 candidates = [best_val]
                 use_base_threshold = best_score >= threshold_abs
+                print(f"Use base threshold: {use_base_threshold} (best_score >= {threshold_abs})")
 
                 # Get semantic clusters if enabled
                 clusters = []
@@ -385,7 +400,12 @@ If an attribute is not present, do not include it in the response.
                         allowed_values, scores, cluster_threshold=0.4
                     )
                     best_cluster = next((c for c in clusters if best_val in c), [best_val])
+                    print(f"\nSemantic Clusters:")
+                    for idx, cluster in enumerate(clusters):
+                        marker = " <- BEST" if best_val in cluster else ""
+                        print(f"  Cluster {idx+1}: {cluster}{marker}")
 
+                print(f"\nEvaluating additional candidates:")
                 for val, sc in scores[1:]:
                     # Calculate dynamic threshold for this value
                     if use_dynamic_thresholds and extracted_attrs:
@@ -404,28 +424,68 @@ If an attribute is not present, do not include it in the response.
                     if use_semantic_clustering and clusters:
                         in_cluster = any(best_val in c and val in c for c in clusters)
 
+                    # DEBUG: Print candidate evaluation
+                    print(f"\n  Candidate: {val}")
+                    print(f"    Score: {sc:.4f}")
+                    print(f"    Margin diff: {best_score - sc:.4f} (within_margin: {within_margin})")
+                    print(f"    Dynamic threshold: {dynamic_thresh:.4f} (above_threshold: {above_threshold})")
+                    print(f"    In cluster with best: {in_cluster}")
+
+                    # MODIFIED LOGIC: More permissive for multi-value extraction
+                    # BALANCED LOGIC: Smart multi-value extraction
+                    include_candidate = False
+                    reason = ""
+
+                    # Calculate score ratio (how close to best score)
+                    score_ratio = sc / best_score if best_score > 0 else 0
+
                     if use_base_threshold:
-                        # Best score is good, require threshold OR (cluster + margin)
+                        # Best score is good (>= threshold), be selective
                         if above_threshold and within_margin:
-                            candidates.append(val)
-                        elif in_cluster and within_margin:
-                            candidates.append(val)
+                            include_candidate = True
+                            reason = "above threshold AND within margin"
+                        elif in_cluster and within_margin and score_ratio >= 0.75:
+                            # Only include cluster members if they're close in score
+                            include_candidate = True
+                            reason = "in cluster AND within margin with good score ratio"
                     else:
-                        # Best score is low, use margin OR cluster logic
-                        if within_margin:
-                            candidates.append(val)
-                        elif in_cluster and (best_score - sc) <= effective_margin * 2.0:
-                            # Extended margin for cluster members
-                            candidates.append(val)
+                        # Best score is low (< threshold), be more careful
+                        # Only include candidates that are very close to the best score
+                        if within_margin and score_ratio >= 0.80:
+                            # Must be at least 80% of best score
+                            include_candidate = True
+                            reason = "within margin with strong score ratio"
+                        elif in_cluster and within_margin and score_ratio >= 0.85:
+                            # Cluster members need even higher ratio when best score is low
+                            include_candidate = True
+                            reason = "in cluster with tight margin and high score ratio"
+
+                    # Additional filter: Never include "Not Specified" if we have better options
+                    if include_candidate and val.lower() in ["not specified", "not_specified", "unspecified"]:
+                        # Only include "Not Specified" if it's the best value AND no other candidates
+                        if len(candidates) > 1 or (sc < best_score * 0.95):
+                            include_candidate = False
+                            reason = "excluded: 'Not Specified' with better alternatives"
+
+                    if include_candidate:
+                        candidates.append(val)
+                        print(f"    ✓ INCLUDED - Reason: {reason}")
+                    else:
+                        print(f"    ✗ EXCLUDED")
 
                 # Map each candidate to its source and create array format
                 extracted[attr] = []
+                print(f"\nFinal candidates for {attr}: {candidates}")
                 for candidate in candidates:
                     source = ProductAttributeService.find_value_source(candidate, source_map)
                     extracted[attr].append({"value": candidate, "source": source})
+                    print(f"  - {candidate} (source: {source})")
+
+            print(f"{'='*80}\n")
 
         return extracted
 
+
     @staticmethod
     def extract_attributes(
         product_text: str,
@@ -454,6 +514,19 @@ If an attribute is not present, do not include it in the response.
         if source_map is None:
             source_map = {}
 
+        # DEBUG: Print what we received
+        print("\n" + "="*80)
+        print("EXTRACT ATTRIBUTES - INPUT PARAMETERS")
+        print("="*80)
+        print(f"Product text length: {len(product_text)}")
+        print(f"Mandatory attrs: {list(mandatory_attrs.keys())}")
+        print(f"Multiple mode for: {multiple}")
+        print(f"Threshold: {threshold_abs}, Margin: {margin}")
+        print(f"Dynamic thresholds: {use_dynamic_thresholds}")
+        print(f"Adaptive margin: {use_adaptive_margin}")
+        print(f"Semantic clustering: {use_semantic_clustering}")
+        print("="*80 + "\n")
+
         # Check if product text is empty or minimal
         if not product_text or product_text == "No product information available":
             return ProductAttributeService._create_error_response(
@@ -471,12 +544,12 @@ If an attribute is not present, do not include it in the response.
         additional_instruction = ""
         if extract_additional:
             additional_instruction = """
-2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
-   that are NOT in the mandatory list. Only include attributes where you can find actual values
-   in the product text. Do NOT include attributes with "Not Specified" or empty values.
-   
-   Examples of attributes to look for (only if present): Brand, Material, Size, Color, Dimensions,
-   Weight, Features, Style, Theme, Pattern, Finish, Care Instructions, etc."""
+    2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
+    that are NOT in the mandatory list. Only include attributes where you can find actual values
+    in the product text. Do NOT include attributes with "Not Specified" or empty values.
+    
+    Examples of attributes to look for (only if present): Brand, Material, Size, Color, Dimensions,
+    Weight, Features, Style, Theme, Pattern, Finish, Care Instructions, etc."""
 
         output_format = {
             "mandatory": {attr: "value or list of values" for attr in mandatory_attrs.keys()},
@@ -490,32 +563,32 @@ If an attribute is not present, do not include it in the response.
             output_format["additional"]["_note"] = "Only include attributes with actual values found in text"
 
         prompt = f"""
-You are an intelligent product attribute extractor that works with ANY product type.
-
-TASK:
-1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value(s)
-   from the provided list. Choose the value(s) that best match the product description.
-{additional_instruction}
-
-Product Text:
-{product_text}
-
-Mandatory Attribute Lists (MUST select from these allowed values):
-{mandatory_attr_text}
-
-CRITICAL INSTRUCTIONS:
-- Return ONLY valid JSON, nothing else
-- No explanations, no markdown, no text before or after the JSON
-- For mandatory attributes, choose the value(s) from the provided list that best match
-- If a mandatory attribute cannot be determined from the product text, use "Not Specified"
-- Prefer exact matches from the allowed values list over generic synonyms
-- If multiple values are plausible, you MAY return more than one
-{f"- For additional attributes: ONLY include attributes where you found actual values in the product text. DO NOT include attributes with 'Not Specified', 'None', 'N/A', or empty values. If you cannot find a value for an attribute, simply don't include that attribute." if extract_additional else ""}
-- Be precise and only extract information that is explicitly stated or clearly implied
-
-Required Output Format:
-{json.dumps(output_format, indent=2)}
-        """
+    You are an intelligent product attribute extractor that works with ANY product type.
+
+    TASK:
+    1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value(s)
+    from the provided list. Choose the value(s) that best match the product description.
+    {additional_instruction}
+
+    Product Text:
+    {product_text}
+
+    Mandatory Attribute Lists (MUST select from these allowed values):
+    {mandatory_attr_text}
+
+    CRITICAL INSTRUCTIONS:
+    - Return ONLY valid JSON, nothing else
+    - No explanations, no markdown, no text before or after the JSON
+    - For mandatory attributes, choose the value(s) from the provided list that best match
+    - If a mandatory attribute cannot be determined from the product text, use "Not Specified"
+    - Prefer exact matches from the allowed values list over generic synonyms
+    - If multiple values are plausible, you MAY return more than one
+    {f"- For additional attributes: ONLY include attributes where you found actual values in the product text. DO NOT include attributes with 'Not Specified', 'None', 'N/A', or empty values. If you cannot find a value for an attribute, simply don't include that attribute." if extract_additional else ""}
+    - Be precise and only extract information that is explicitly stated or clearly implied
+
+    Required Output Format:
+    {json.dumps(output_format, indent=2)}
+            """
 
         payload = {
             "model": model,
@@ -577,6 +650,12 @@ Required Output Format:
             extracted_so_far = {}
             for attr in mandatory_attrs.keys():
                 allow_multiple = attr in multiple
+                
+                # DEBUG: Print per-attribute processing
+                print(f"\n>>> Processing attribute: {attr}")
+                print(f"    Allow multiple: {allow_multiple}")
+                print(f"    In multiple list: {attr in multiple}")
+                print(f"    Multiple list: {multiple}")
 
                 result = ProductAttributeService.normalize_against_product_text(
                     product_text=product_text,
@@ -610,6 +689,7 @@ Required Output Format:
                 str(e), mandatory_attrs, extract_additional
             )
 
+
     @staticmethod
     def extract_attributes_batch(
         products: List[Dict],

+ 18 - 1
attr_extraction/views.py

@@ -116,6 +116,15 @@ class BatchExtractProductAttributesView(APIView):
 
         validated_data = serializer.validated_data
         
+        # DEBUG: Print what we received
+        print("\n" + "="*80)
+        print("BATCH REQUEST - RECEIVED DATA")
+        print("="*80)
+        print(f"Raw request data keys: {request.data.keys()}")
+        print(f"Multiple field in request: {request.data.get('multiple')}")
+        print(f"Validated multiple field: {validated_data.get('multiple')}")
+        print("="*80 + "\n")
+        
         # Get batch-level settings
         product_list = validated_data.get("products", [])
         model = validated_data.get("model")
@@ -128,6 +137,10 @@ class BatchExtractProductAttributesView(APIView):
         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
         
+        # DEBUG: Print extracted settings
+        print(f"Extracted multiple parameter: {multiple}")
+        print(f"Type: {type(multiple)}")
+        
         # Extract all item_ids to query the database efficiently
         item_ids = [p['item_id'] for p in product_list] 
         
@@ -189,6 +202,10 @@ class BatchExtractProductAttributesView(APIView):
                     ocr_text=ocr_text
                 )
 
+                # DEBUG: Print before extraction
+                print(f"\n>>> Extracting for product {item_id}")
+                print(f"    Passing multiple: {multiple}")
+
                 # Attribute Extraction with source tracking (returns array format)
                 extracted = ProductAttributeService.extract_attributes(
                     product_text=product_text,
@@ -196,7 +213,7 @@ class BatchExtractProductAttributesView(APIView):
                     source_map=source_map,
                     model=model,
                     extract_additional=extract_additional,
-                    multiple=multiple,
+                    multiple=multiple,  # Make sure this is passed!
                     threshold_abs=threshold_abs,
                     margin=margin,
                     use_dynamic_thresholds=use_dynamic_thresholds,