3 ماه پیش · c3d9c6977f
--- a/attr_extraction/serializers.py
+++ b/attr_extraction/serializers.py
@@ -1,3 +1,9 @@
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# # ==================== serializers.py ====================
			
 
				 # from rest_framework import serializers
			
 
				 
			
 
				 # class ProductInputSerializer(serializers.Serializer):
			
@@ -6,19 +12,36 @@
 
				 #     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				 #     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				 #     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
			
 
				+
			
 
				+# class MandatoryAttrsField(serializers.DictField):
			
 
				+#     """Custom DictField to validate mandatory_attrs structure."""
			
 
				+#     child = serializers.ListField(child=serializers.CharField())
			
 
				+
			
 
				+# class ProductBatchInputSerializer(serializers.Serializer):
			
 
				+#     """Serializer for an individual product input within the batch request."""
			
 
				+#     item_id = serializers.CharField(required=True)
			
 
				+#     mandatory_attrs = MandatoryAttrsField(
			
 
				+#         required=True,
			
 
				+#         help_text="A dictionary of attribute names and their possible values."
			
 
				+#     )
			
 
				+#     # You can also allow per-product model/flags if needed, but keeping it batch-level for simplicity here.
			
 
				 
			
 
				 
			
 
				 # class SingleProductRequestSerializer(serializers.Serializer):
			
 
				 #     """Serializer for single product extraction request."""
			
 
				-#     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				-#     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				-#     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     # title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     # short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     # long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     # image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     item_id = serializers.CharField(required=True)
			
 
				 #     mandatory_attrs = serializers.DictField(
			
 
				 #         child=serializers.ListField(child=serializers.CharField()),
			
 
				 #         required=True
			
 
				 #     )
			
 
				 #     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
			
 
				 #     extract_additional = serializers.BooleanField(required=False, default=True)
			
 
				+#     process_image = serializers.BooleanField(required=False, default=True)
			
 
				 
			
 
				 #     def validate_model(self, value):
			
 
				 #         from django.conf import settings
			
@@ -29,20 +52,19 @@
 
				 #         return value
			
 
				 
			
 
				 
			
 
				+
			
 
				 # class BatchProductRequestSerializer(serializers.Serializer):
			
 
				-#     """Serializer for batch product extraction request."""
			
 
				+#     """Serializer for batch product extraction request (with item-specific attributes)."""
			
 
				 #     products = serializers.ListField(
			
 
				-#         child=ProductInputSerializer(),
			
 
				+#         child=ProductBatchInputSerializer(), # <--- Changed
			
 
				 #         required=True,
			
 
				 #         min_length=1
			
 
				 #     )
			
 
				-#     mandatory_attrs = serializers.DictField(
			
 
				-#         child=serializers.ListField(child=serializers.CharField()),
			
 
				-#         required=True
			
 
				-#     )
			
 
				 #     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
			
 
				 #     extract_additional = serializers.BooleanField(required=False, default=True)
			
 
				-
			
 
				+#     process_image = serializers.BooleanField(required=False, default=True)
			
 
				+    
			
 
				+#     # ... validate_model method ...
			
 
				 #     def validate_model(self, value):
			
 
				 #         from django.conf import settings
			
 
				 #         if value not in settings.SUPPORTED_MODELS:
			
@@ -50,7 +72,8 @@
 
				 #                 f"Model must be one of {settings.SUPPORTED_MODELS}"
			
 
				 #             )
			
 
				 #         return value
			
 
				-
			
 
				+    
			
 
				+#     # ... validate_products method (updated to use products instead of item_ids) ...
			
 
				 #     def validate_products(self, value):
			
 
				 #         from django.conf import settings
			
 
				 #         max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
			
@@ -60,12 +83,18 @@
 
				 #             )
			
 
				 #         return value
			
 
				 
			
 
				+# class OCRResultSerializer(serializers.Serializer):
			
 
				+#     """Serializer for OCR results."""
			
 
				+#     detected_text = serializers.ListField(child=serializers.DictField())
			
 
				+#     extracted_attributes = serializers.DictField()
			
 
				+
			
 
				 
			
 
				 # class ProductAttributeResultSerializer(serializers.Serializer):
			
 
				 #     """Serializer for individual product extraction result."""
			
 
				 #     product_id = serializers.CharField(required=False)
			
 
				 #     mandatory = serializers.DictField()
			
 
				 #     additional = serializers.DictField(required=False)
			
 
				+#     ocr_results = OCRResultSerializer(required=False)
			
 
				 #     error = serializers.CharField(required=False)
			
 
				 #     raw_output = serializers.CharField(required=False)
			
 
				 
			
@@ -76,6 +105,101 @@
 
				 #     total_products = serializers.IntegerField()
			
 
				 #     successful = serializers.IntegerField()
			
 
				 #     failed = serializers.IntegerField()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# from rest_framework import serializers
			
 
				+# from .models import Product
			
 
				+
			
 
				+# class ProductSerializer(serializers.ModelSerializer):
			
 
				+#     product_type_details = serializers.SerializerMethodField()
			
 
				+    
			
 
				+#     class Meta:
			
 
				+#         model = Product
			
 
				+#         fields = [
			
 
				+#             'id',
			
 
				+#             'item_id',
			
 
				+#             'product_name',
			
 
				+#             'product_long_description',
			
 
				+#             'product_short_description',
			
 
				+#             'product_type',
			
 
				+#             'image_path',
			
 
				+#             'image',
			
 
				+#             'product_type_details',  # new field
			
 
				+#         ]
			
 
				+
			
 
				+#     def get_product_type_details(self, obj):
			
 
				+#         # Fetch ProductType object for this product
			
 
				+#         try:
			
 
				+#             product_type = ProductType.objects.get(name=obj.product_type)
			
 
				+#         except ProductType.DoesNotExist:
			
 
				+#             return []
			
 
				+
			
 
				+#         # Serialize its attributes
			
 
				+#         attributes = ProductAttribute.objects.filter(product_type=product_type)
			
 
				+#         return [
			
 
				+#             {
			
 
				+#                 "attribute_name": attr.name,
			
 
				+#                 "is_mandatory": "Yes" if attr.is_mandatory else "No",
			
 
				+#                 "possible_values": [pv.value for pv in attr.possible_values.all()]
			
 
				+#             }
			
 
				+#             for attr in attributes
			
 
				+#         ]
			
 
				+
			
 
				+
			
 
				+
			
 
				+# from rest_framework import serializers
			
 
				+# from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
			
 
				+
			
 
				+# class AttributePossibleValueSerializer(serializers.ModelSerializer):
			
 
				+#     class Meta:
			
 
				+#         model = AttributePossibleValue
			
 
				+#         fields = ['value']
			
 
				+
			
 
				+# class ProductAttributeSerializer(serializers.ModelSerializer):
			
 
				+#     possible_values = AttributePossibleValueSerializer(many=True, read_only=True)
			
 
				+    
			
 
				+#     class Meta:
			
 
				+#         model = ProductAttribute
			
 
				+#         fields = ['name', 'is_mandatory', 'possible_values']
			
 
				+
			
 
				+# class ProductTypeSerializer(serializers.ModelSerializer):
			
 
				+#     attributes = ProductAttributeSerializer(many=True, read_only=True)
			
 
				+    
			
 
				+#     class Meta:
			
 
				+#         model = ProductType
			
 
				+#         fields = ['name', 'attributes']
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				 
			
 
				 
			
 
				 
			
@@ -86,6 +210,8 @@
 
				 
			
 
				 # ==================== serializers.py ====================
			
 
				 from rest_framework import serializers
			
 
				+from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
			
 
				+
			
 
				 
			
 
				 class ProductInputSerializer(serializers.Serializer):
			
 
				     """Serializer for individual product input."""
			
@@ -95,10 +221,12 @@ class ProductInputSerializer(serializers.Serializer):
 
				     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				     image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
			
 
				 
			
 
				+
			
 
				 class MandatoryAttrsField(serializers.DictField):
			
 
				     """Custom DictField to validate mandatory_attrs structure."""
			
 
				     child = serializers.ListField(child=serializers.CharField())
			
 
				 
			
 
				+
			
 
				 class ProductBatchInputSerializer(serializers.Serializer):
			
 
				     """Serializer for an individual product input within the batch request."""
			
 
				     item_id = serializers.CharField(required=True)
			
@@ -106,15 +234,10 @@ class ProductBatchInputSerializer(serializers.Serializer):
 
				         required=True,
			
 
				         help_text="A dictionary of attribute names and their possible values."
			
 
				     )
			
 
				-    # You can also allow per-product model/flags if needed, but keeping it batch-level for simplicity here.
			
 
				 
			
 
				 
			
 
				 class SingleProductRequestSerializer(serializers.Serializer):
			
 
				     """Serializer for single product extraction request."""
			
 
				-    # title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				-    # short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				-    # long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				-    # image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
			
 
				     item_id = serializers.CharField(required=True)
			
 
				     mandatory_attrs = serializers.DictField(
			
 
				         child=serializers.ListField(child=serializers.CharField()),
			
@@ -123,6 +246,17 @@ class SingleProductRequestSerializer(serializers.Serializer):
 
				     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
			
 
				     extract_additional = serializers.BooleanField(required=False, default=True)
			
 
				     process_image = serializers.BooleanField(required=False, default=True)
			
 
				+    multiple = serializers.ListField(
			
 
				+        child=serializers.CharField(),
			
 
				+        required=False,
			
 
				+        default=list,
			
 
				+        help_text="List of attribute names that can have multiple values"
			
 
				+    )
			
 
				+    threshold_abs = serializers.FloatField(default=0.65, required=False)
			
 
				+    margin = serializers.FloatField(default=0.15, required=False)
			
 
				+    use_dynamic_thresholds = serializers.BooleanField(default=True, required=False)
			
 
				+    use_adaptive_margin = serializers.BooleanField(default=True, required=False)
			
 
				+    use_semantic_clustering = serializers.BooleanField(default=True, required=False)
			
 
				 
			
 
				     def validate_model(self, value):
			
 
				         from django.conf import settings
			
@@ -133,19 +267,28 @@ class SingleProductRequestSerializer(serializers.Serializer):
 
				         return value
			
 
				 
			
 
				 
			
 
				-
			
 
				 class BatchProductRequestSerializer(serializers.Serializer):
			
 
				     """Serializer for batch product extraction request (with item-specific attributes)."""
			
 
				     products = serializers.ListField(
			
 
				-        child=ProductBatchInputSerializer(), # <--- Changed
			
 
				+        child=ProductBatchInputSerializer(),
			
 
				         required=True,
			
 
				         min_length=1
			
 
				     )
			
 
				     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
			
 
				     extract_additional = serializers.BooleanField(required=False, default=True)
			
 
				     process_image = serializers.BooleanField(required=False, default=True)
			
 
				+    multiple = serializers.ListField(
			
 
				+        child=serializers.CharField(),
			
 
				+        required=False,
			
 
				+        default=list,
			
 
				+        help_text="List of attribute names that can have multiple values"
			
 
				+    )
			
 
				+    threshold_abs = serializers.FloatField(default=0.65, required=False)
			
 
				+    margin = serializers.FloatField(default=0.15, required=False)
			
 
				+    use_dynamic_thresholds = serializers.BooleanField(default=True, required=False)
			
 
				+    use_adaptive_margin = serializers.BooleanField(default=True, required=False)
			
 
				+    use_semantic_clustering = serializers.BooleanField(default=True, required=False)
			
 
				     
			
 
				-    # ... validate_model method ...
			
 
				     def validate_model(self, value):
			
 
				         from django.conf import settings
			
 
				         if value not in settings.SUPPORTED_MODELS:
			
@@ -154,7 +297,6 @@ class BatchProductRequestSerializer(serializers.Serializer):
 
				             )
			
 
				         return value
			
 
				     
			
 
				-    # ... validate_products method (updated to use products instead of item_ids) ...
			
 
				     def validate_products(self, value):
			
 
				         from django.conf import settings
			
 
				         max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
			
@@ -164,6 +306,7 @@ class BatchProductRequestSerializer(serializers.Serializer):
 
				             )
			
 
				         return value
			
 
				 
			
 
				+
			
 
				 class OCRResultSerializer(serializers.Serializer):
			
 
				     """Serializer for OCR results."""
			
 
				     detected_text = serializers.ListField(child=serializers.DictField())
			
@@ -188,12 +331,8 @@ class BatchProductResponseSerializer(serializers.Serializer):
 
				     failed = serializers.IntegerField()
			
 
				 
			
 
				 
			
 
				-
			
 
				-
			
 
				-from rest_framework import serializers
			
 
				-from .models import Product
			
 
				-
			
 
				 class ProductSerializer(serializers.ModelSerializer):
			
 
				+    """Serializer for Product model with product type details."""
			
 
				     product_type_details = serializers.SerializerMethodField()
			
 
				     
			
 
				     class Meta:
			
@@ -207,11 +346,11 @@ class ProductSerializer(serializers.ModelSerializer):
 
				             'product_type',
			
 
				             'image_path',
			
 
				             'image',
			
 
				-            'product_type_details',  # new field
			
 
				+            'product_type_details',
			
 
				         ]
			
 
				 
			
 
				     def get_product_type_details(self, obj):
			
 
				-        # Fetch ProductType object for this product
			
 
				+        """Fetch ProductType object and its attributes for this product."""
			
 
				         try:
			
 
				             product_type = ProductType.objects.get(name=obj.product_type)
			
 
				         except ProductType.DoesNotExist:
			
@@ -229,25 +368,34 @@ class ProductSerializer(serializers.ModelSerializer):
 
				         ]
			
 
				 
			
 
				 
			
 
				-
			
 
				-from rest_framework import serializers
			
 
				-from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
			
 
				-
			
 
				 class AttributePossibleValueSerializer(serializers.ModelSerializer):
			
 
				+    """Serializer for AttributePossibleValue model."""
			
 
				     class Meta:
			
 
				         model = AttributePossibleValue
			
 
				         fields = ['value']
			
 
				 
			
 
				+
			
 
				 class ProductAttributeSerializer(serializers.ModelSerializer):
			
 
				+    """Serializer for ProductAttribute model with possible values."""
			
 
				     possible_values = AttributePossibleValueSerializer(many=True, read_only=True)
			
 
				     
			
 
				     class Meta:
			
 
				         model = ProductAttribute
			
 
				         fields = ['name', 'is_mandatory', 'possible_values']
			
 
				 
			
 
				+
			
 
				 class ProductTypeSerializer(serializers.ModelSerializer):
			
 
				+    """Serializer for ProductType model with attributes."""
			
 
				     attributes = ProductAttributeSerializer(many=True, read_only=True)
			
 
				     
			
 
				     class Meta:
			
 
				         model = ProductType
			
 
				         fields = ['name', 'attributes']
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+        
			
--- a/attr_extraction/services.py
+++ b/attr_extraction/services.py
@@ -366,17 +366,32 @@ If an attribute is not present, do not include it in the response.
 
				             scores.sort(key=lambda x: x[1], reverse=True)
			
 
				             best_val, best_score = scores[0]
			
 
				 
			
 
				+            # DEBUG: Print scores
			
 
				+            print(f"\n{'='*80}")
			
 
				+            print(f"Attribute: {attr}")
			
 
				+            print(f"{'='*80}")
			
 
				+            print(f"Top 5 Scores:")
			
 
				+            for i, (val, sc) in enumerate(scores[:5]):
			
 
				+                print(f"  {i+1}. {val}: {sc:.4f}")
			
 
				+            print(f"\nBest: {best_val} (score: {best_score:.4f})")
			
 
				+            print(f"Base Threshold: {threshold_abs}")
			
 
				+            print(f"Base Margin: {margin}")
			
 
				+
			
 
				             # Calculate adaptive margin if enabled
			
 
				             effective_margin = margin
			
 
				             if allow_multiple and use_adaptive_margin:
			
 
				                 effective_margin = ProductAttributeService.get_adaptive_margin(scores, margin)
			
 
				+                print(f"Adaptive Margin: {effective_margin}")
			
 
				 
			
 
				             if not allow_multiple:
			
 
				                 source = ProductAttributeService.find_value_source(best_val, source_map)
			
 
				                 extracted[attr] = [{"value": best_val, "source": source}]
			
 
				+                print(f"Single value mode - Selected: {best_val}")
			
 
				             else:
			
 
				+                print(f"\nMultiple value mode enabled")
			
 
				                 candidates = [best_val]
			
 
				                 use_base_threshold = best_score >= threshold_abs
			
 
				+                print(f"Use base threshold: {use_base_threshold} (best_score >= {threshold_abs})")
			
 
				 
			
 
				                 # Get semantic clusters if enabled
			
 
				                 clusters = []
			
@@ -385,7 +400,12 @@ If an attribute is not present, do not include it in the response.
 
				                         allowed_values, scores, cluster_threshold=0.4
			
 
				                     )
			
 
				                     best_cluster = next((c for c in clusters if best_val in c), [best_val])
			
 
				+                    print(f"\nSemantic Clusters:")
			
 
				+                    for idx, cluster in enumerate(clusters):
			
 
				+                        marker = " <- BEST" if best_val in cluster else ""
			
 
				+                        print(f"  Cluster {idx+1}: {cluster}{marker}")
			
 
				 
			
 
				+                print(f"\nEvaluating additional candidates:")
			
 
				                 for val, sc in scores[1:]:
			
 
				                     # Calculate dynamic threshold for this value
			
 
				                     if use_dynamic_thresholds and extracted_attrs:
			
@@ -404,28 +424,68 @@ If an attribute is not present, do not include it in the response.
 
				                     if use_semantic_clustering and clusters:
			
 
				                         in_cluster = any(best_val in c and val in c for c in clusters)
			
 
				 
			
 
				+                    # DEBUG: Print candidate evaluation
			
 
				+                    print(f"\n  Candidate: {val}")
			
 
				+                    print(f"    Score: {sc:.4f}")
			
 
				+                    print(f"    Margin diff: {best_score - sc:.4f} (within_margin: {within_margin})")
			
 
				+                    print(f"    Dynamic threshold: {dynamic_thresh:.4f} (above_threshold: {above_threshold})")
			
 
				+                    print(f"    In cluster with best: {in_cluster}")
			
 
				+
			
 
				+                    # MODIFIED LOGIC: More permissive for multi-value extraction
			
 
				+                    # BALANCED LOGIC: Smart multi-value extraction
			
 
				+                    include_candidate = False
			
 
				+                    reason = ""
			
 
				+
			
 
				+                    # Calculate score ratio (how close to best score)
			
 
				+                    score_ratio = sc / best_score if best_score > 0 else 0
			
 
				+
			
 
				                     if use_base_threshold:
			
 
				-                        # Best score is good, require threshold OR (cluster + margin)
			
 
				+                        # Best score is good (>= threshold), be selective
			
 
				                         if above_threshold and within_margin:
			
 
				-                            candidates.append(val)
			
 
				-                        elif in_cluster and within_margin:
			
 
				-                            candidates.append(val)
			
 
				+                            include_candidate = True
			
 
				+                            reason = "above threshold AND within margin"
			
 
				+                        elif in_cluster and within_margin and score_ratio >= 0.75:
			
 
				+                            # Only include cluster members if they're close in score
			
 
				+                            include_candidate = True
			
 
				+                            reason = "in cluster AND within margin with good score ratio"
			
 
				                     else:
			
 
				-                        # Best score is low, use margin OR cluster logic
			
 
				-                        if within_margin:
			
 
				-                            candidates.append(val)
			
 
				-                        elif in_cluster and (best_score - sc) <= effective_margin * 2.0:
			
 
				-                            # Extended margin for cluster members
			
 
				-                            candidates.append(val)
			
 
				+                        # Best score is low (< threshold), be more careful
			
 
				+                        # Only include candidates that are very close to the best score
			
 
				+                        if within_margin and score_ratio >= 0.80:
			
 
				+                            # Must be at least 80% of best score
			
 
				+                            include_candidate = True
			
 
				+                            reason = "within margin with strong score ratio"
			
 
				+                        elif in_cluster and within_margin and score_ratio >= 0.85:
			
 
				+                            # Cluster members need even higher ratio when best score is low
			
 
				+                            include_candidate = True
			
 
				+                            reason = "in cluster with tight margin and high score ratio"
			
 
				+
			
 
				+                    # Additional filter: Never include "Not Specified" if we have better options
			
 
				+                    if include_candidate and val.lower() in ["not specified", "not_specified", "unspecified"]:
			
 
				+                        # Only include "Not Specified" if it's the best value AND no other candidates
			
 
				+                        if len(candidates) > 1 or (sc < best_score * 0.95):
			
 
				+                            include_candidate = False
			
 
				+                            reason = "excluded: 'Not Specified' with better alternatives"
			
 
				+
			
 
				+                    if include_candidate:
			
 
				+                        candidates.append(val)
			
 
				+                        print(f"    ✓ INCLUDED - Reason: {reason}")
			
 
				+                    else:
			
 
				+                        print(f"    ✗ EXCLUDED")
			
 
				 
			
 
				                 # Map each candidate to its source and create array format
			
 
				                 extracted[attr] = []
			
 
				+                print(f"\nFinal candidates for {attr}: {candidates}")
			
 
				                 for candidate in candidates:
			
 
				                     source = ProductAttributeService.find_value_source(candidate, source_map)
			
 
				                     extracted[attr].append({"value": candidate, "source": source})
			
 
				+                    print(f"  - {candidate} (source: {source})")
			
 
				+
			
 
				+            print(f"{'='*80}\n")
			
 
				 
			
 
				         return extracted
			
 
				 
			
 
				+
			
 
				     @staticmethod
			
 
				     def extract_attributes(
			
 
				         product_text: str,
			
@@ -454,6 +514,19 @@ If an attribute is not present, do not include it in the response.
 
				         if source_map is None:
			
 
				             source_map = {}
			
 
				 
			
 
				+        # DEBUG: Print what we received
			
 
				+        print("\n" + "="*80)
			
 
				+        print("EXTRACT ATTRIBUTES - INPUT PARAMETERS")
			
 
				+        print("="*80)
			
 
				+        print(f"Product text length: {len(product_text)}")
			
 
				+        print(f"Mandatory attrs: {list(mandatory_attrs.keys())}")
			
 
				+        print(f"Multiple mode for: {multiple}")
			
 
				+        print(f"Threshold: {threshold_abs}, Margin: {margin}")
			
 
				+        print(f"Dynamic thresholds: {use_dynamic_thresholds}")
			
 
				+        print(f"Adaptive margin: {use_adaptive_margin}")
			
 
				+        print(f"Semantic clustering: {use_semantic_clustering}")
			
 
				+        print("="*80 + "\n")
			
 
				+
			
 
				         # Check if product text is empty or minimal
			
 
				         if not product_text or product_text == "No product information available":
			
 
				             return ProductAttributeService._create_error_response(
			
@@ -471,12 +544,12 @@ If an attribute is not present, do not include it in the response.
 
				         additional_instruction = ""
			
 
				         if extract_additional:
			
 
				             additional_instruction = """
			
 
				-2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
			
 
				-   that are NOT in the mandatory list. Only include attributes where you can find actual values
			
 
				-   in the product text. Do NOT include attributes with "Not Specified" or empty values.
			
 
				-   
			
 
				-   Examples of attributes to look for (only if present): Brand, Material, Size, Color, Dimensions,
			
 
				-   Weight, Features, Style, Theme, Pattern, Finish, Care Instructions, etc."""
			
 
				+    2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
			
 
				+    that are NOT in the mandatory list. Only include attributes where you can find actual values
			
 
				+    in the product text. Do NOT include attributes with "Not Specified" or empty values.
			
 
				+    
			
 
				+    Examples of attributes to look for (only if present): Brand, Material, Size, Color, Dimensions,
			
 
				+    Weight, Features, Style, Theme, Pattern, Finish, Care Instructions, etc."""
			
 
				 
			
 
				         output_format = {
			
 
				             "mandatory": {attr: "value or list of values" for attr in mandatory_attrs.keys()},
			
@@ -490,32 +563,32 @@ If an attribute is not present, do not include it in the response.
 
				             output_format["additional"]["_note"] = "Only include attributes with actual values found in text"
			
 
				 
			
 
				         prompt = f"""
			
 
				-You are an intelligent product attribute extractor that works with ANY product type.
			
 
				-
			
 
				-TASK:
			
 
				-1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value(s)
			
 
				-   from the provided list. Choose the value(s) that best match the product description.
			
 
				-{additional_instruction}
			
 
				-
			
 
				-Product Text:
			
 
				-{product_text}
			
 
				-
			
 
				-Mandatory Attribute Lists (MUST select from these allowed values):
			
 
				-{mandatory_attr_text}
			
 
				-
			
 
				-CRITICAL INSTRUCTIONS:
			
 
				-- Return ONLY valid JSON, nothing else
			
 
				-- No explanations, no markdown, no text before or after the JSON
			
 
				-- For mandatory attributes, choose the value(s) from the provided list that best match
			
 
				-- If a mandatory attribute cannot be determined from the product text, use "Not Specified"
			
 
				-- Prefer exact matches from the allowed values list over generic synonyms
			
 
				-- If multiple values are plausible, you MAY return more than one
			
 
				-{f"- For additional attributes: ONLY include attributes where you found actual values in the product text. DO NOT include attributes with 'Not Specified', 'None', 'N/A', or empty values. If you cannot find a value for an attribute, simply don't include that attribute." if extract_additional else ""}
			
 
				-- Be precise and only extract information that is explicitly stated or clearly implied
			
 
				-
			
 
				-Required Output Format:
			
 
				-{json.dumps(output_format, indent=2)}
			
 
				-        """
			
 
				+    You are an intelligent product attribute extractor that works with ANY product type.
			
 
				+
			
 
				+    TASK:
			
 
				+    1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value(s)
			
 
				+    from the provided list. Choose the value(s) that best match the product description.
			
 
				+    {additional_instruction}
			
 
				+
			
 
				+    Product Text:
			
 
				+    {product_text}
			
 
				+
			
 
				+    Mandatory Attribute Lists (MUST select from these allowed values):
			
 
				+    {mandatory_attr_text}
			
 
				+
			
 
				+    CRITICAL INSTRUCTIONS:
			
 
				+    - Return ONLY valid JSON, nothing else
			
 
				+    - No explanations, no markdown, no text before or after the JSON
			
 
				+    - For mandatory attributes, choose the value(s) from the provided list that best match
			
 
				+    - If a mandatory attribute cannot be determined from the product text, use "Not Specified"
			
 
				+    - Prefer exact matches from the allowed values list over generic synonyms
			
 
				+    - If multiple values are plausible, you MAY return more than one
			
 
				+    {f"- For additional attributes: ONLY include attributes where you found actual values in the product text. DO NOT include attributes with 'Not Specified', 'None', 'N/A', or empty values. If you cannot find a value for an attribute, simply don't include that attribute." if extract_additional else ""}
			
 
				+    - Be precise and only extract information that is explicitly stated or clearly implied
			
 
				+
			
 
				+    Required Output Format:
			
 
				+    {json.dumps(output_format, indent=2)}
			
 
				+            """
			
 
				 
			
 
				         payload = {
			
 
				             "model": model,
			
@@ -577,6 +650,12 @@ Required Output Format:
 
				             extracted_so_far = {}
			
 
				             for attr in mandatory_attrs.keys():
			
 
				                 allow_multiple = attr in multiple
			
 
				+                
			
 
				+                # DEBUG: Print per-attribute processing
			
 
				+                print(f"\n>>> Processing attribute: {attr}")
			
 
				+                print(f"    Allow multiple: {allow_multiple}")
			
 
				+                print(f"    In multiple list: {attr in multiple}")
			
 
				+                print(f"    Multiple list: {multiple}")
			
 
				 
			
 
				                 result = ProductAttributeService.normalize_against_product_text(
			
 
				                     product_text=product_text,
			
@@ -610,6 +689,7 @@ Required Output Format:
 
				                 str(e), mandatory_attrs, extract_additional
			
 
				             )
			
 
				 
			
 
				+
			
 
				     @staticmethod
			
 
				     def extract_attributes_batch(
			
 
				         products: List[Dict],
			
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -116,6 +116,15 @@ class BatchExtractProductAttributesView(APIView):
 
				 
			
 
				         validated_data = serializer.validated_data
			
 
				         
			
 
				+        # DEBUG: Print what we received
			
 
				+        print("\n" + "="*80)
			
 
				+        print("BATCH REQUEST - RECEIVED DATA")
			
 
				+        print("="*80)
			
 
				+        print(f"Raw request data keys: {request.data.keys()}")
			
 
				+        print(f"Multiple field in request: {request.data.get('multiple')}")
			
 
				+        print(f"Validated multiple field: {validated_data.get('multiple')}")
			
 
				+        print("="*80 + "\n")
			
 
				+        
			
 
				         # Get batch-level settings
			
 
				         product_list = validated_data.get("products", [])
			
 
				         model = validated_data.get("model")
			
@@ -128,6 +137,10 @@ class BatchExtractProductAttributesView(APIView):
 
				         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
			
 
				         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
			
 
				         
			
 
				+        # DEBUG: Print extracted settings
			
 
				+        print(f"Extracted multiple parameter: {multiple}")
			
 
				+        print(f"Type: {type(multiple)}")
			
 
				+        
			
 
				         # Extract all item_ids to query the database efficiently
			
 
				         item_ids = [p['item_id'] for p in product_list] 
			
 
				         
			
@@ -189,6 +202,10 @@ class BatchExtractProductAttributesView(APIView):
 
				                     ocr_text=ocr_text
			
 
				                 )
			
 
				 
			
 
				+                # DEBUG: Print before extraction
			
 
				+                print(f"\n>>> Extracting for product {item_id}")
			
 
				+                print(f"    Passing multiple: {multiple}")
			
 
				+
			
 
				                 # Attribute Extraction with source tracking (returns array format)
			
 
				                 extracted = ProductAttributeService.extract_attributes(
			
 
				                     product_text=product_text,
			
@@ -196,7 +213,7 @@ class BatchExtractProductAttributesView(APIView):
 
				                     source_map=source_map,
			
 
				                     model=model,
			
 
				                     extract_additional=extract_additional,
			
 
				-                    multiple=multiple,
			
 
				+                    multiple=multiple,  # Make sure this is passed!
			
 
				                     threshold_abs=threshold_abs,
			
 
				                     margin=margin,
			
 
				                     use_dynamic_thresholds=use_dynamic_thresholds,