há 3 meses atrás · ae7867812b
--- a/attr_extraction/__init__.py
+++ b/attr_extraction/__init__.py
--- a/attr_extraction/admin.py
+++ b/attr_extraction/admin.py
@@ -0,0 +1,3 @@
 
															+from django.contrib import admin
														
 
															+
														
 
															+# Register your models here.
														
--- a/attr_extraction/apps.py
+++ b/attr_extraction/apps.py
@@ -0,0 +1,6 @@
 
															+from django.apps import AppConfig
														
 
															+
														
 
															+
														
 
															+class AttrExtractionConfig(AppConfig):
														
 
															+    default_auto_field = 'django.db.models.BigAutoField'
														
 
															+    name = 'attr_extraction'
														
--- a/attr_extraction/migrations/0001_initial.py
+++ b/attr_extraction/migrations/0001_initial.py
@@ -0,0 +1,27 @@
 
															+# Generated by Django 5.2.7 on 2025-10-17 10:21
														
 
															+
														
 
															+from django.db import migrations, models
														
 
															+
														
 
															+
														
 
															+class Migration(migrations.Migration):
														
 
															+
														
 
															+    initial = True
														
 
															+
														
 
															+    dependencies = [
														
 
															+    ]
														
 
															+
														
 
															+    operations = [
														
 
															+        migrations.CreateModel(
														
 
															+            name='Product',
														
 
															+            fields=[
														
 
															+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
														
 
															+                ('item_id', models.CharField(max_length=100, unique=True)),
														
 
															+                ('product_name', models.CharField(max_length=255)),
														
 
															+                ('product_long_description', models.TextField(blank=True, null=True)),
														
 
															+                ('product_short_description', models.TextField(blank=True, null=True)),
														
 
															+                ('product_type', models.CharField(blank=True, max_length=100, null=True)),
														
 
															+                ('image_path', models.CharField(blank=True, max_length=500, null=True)),
														
 
															+                ('image', models.ImageField(blank=True, null=True, upload_to='products/')),
														
 
															+            ],
														
 
															+        ),
														
 
															+    ]
														
--- a/attr_extraction/migrations/__init__.py
+++ b/attr_extraction/migrations/__init__.py
--- a/attr_extraction/models.py
+++ b/attr_extraction/models.py
@@ -0,0 +1,16 @@
 
															+from django.db import models
														
 
															+
														
 
															+class Product(models.Model):
														
 
															+    """
														
 
															+    Stores product details
														
 
															+    """
														
 
															+    item_id = models.CharField(max_length=100, unique=True)
														
 
															+    product_name = models.CharField(max_length=255)
														
 
															+    product_long_description = models.TextField(blank=True, null=True)
														
 
															+    product_short_description = models.TextField(blank=True, null=True)
														
 
															+    product_type = models.CharField(max_length=100, blank=True, null=True)
														
 
															+    image_path = models.CharField(max_length=500, blank=True, null=True)
														
 
															+    image = models.ImageField(upload_to='products/', blank=True, null=True)
														
 
															+
														
 
															+    def __str__(self):
														
 
															+        return f"{self.product_name} ({self.item_id})"
														
--- a/attr_extraction/ocr_service.py
+++ b/attr_extraction/ocr_service.py
@@ -0,0 +1,151 @@
 
															+# ==================== ocr_service.py ====================
														
 
															+import cv2
														
 
															+import easyocr
														
 
															+import numpy as np
														
 
															+import re
														
 
															+import requests
														
 
															+from io import BytesIO
														
 
															+from PIL import Image
														
 
															+from typing import List, Tuple, Dict, Optional
														
 
															+import logging
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+
														
 
															+class OCRService:
														
 
															+    """Service for extracting text from product images using OCR."""
														
 
															+    
														
 
															+    def __init__(self):
														
 
															+        self.reader = None
														
 
															+    
														
 
															+    def _get_reader(self):
														
 
															+        """Lazy load EasyOCR reader."""
														
 
															+        if self.reader is None:
														
 
															+            self.reader = easyocr.Reader(['en'], gpu=False)
														
 
															+        return self.reader
														
 
															+    
														
 
															+    def download_image(self, image_url: str) -> Optional[np.ndarray]:
														
 
															+        """Download image from URL and convert to OpenCV format."""
														
 
															+        try:
														
 
															+            response = requests.get(image_url, timeout=10)
														
 
															+            response.raise_for_status()
														
 
															+            
														
 
															+            # Convert to PIL Image then to OpenCV format
														
 
															+            pil_image = Image.open(BytesIO(response.content))
														
 
															+            image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
														
 
															+            return image
														
 
															+        except Exception as e:
														
 
															+            logger.error(f"Error downloading image from {image_url}: {str(e)}")
														
 
															+            return None
														
 
															+    
														
 
															+    def preprocess_horizontal(self, image: np.ndarray) -> np.ndarray:
														
 
															+        """Preprocess image for horizontal text."""
														
 
															+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
														
 
															+        enhanced = cv2.GaussianBlur(gray, (5, 5), 0)
														
 
															+        _, binary = cv2.threshold(enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
														
 
															+        return binary
														
 
															+    
														
 
															+    def preprocess_vertical(self, image: np.ndarray) -> np.ndarray:
														
 
															+        """Preprocess image for vertical text."""
														
 
															+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
														
 
															+        enhanced = cv2.equalizeHist(gray)
														
 
															+        thresh = cv2.adaptiveThreshold(
														
 
															+            enhanced, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
														
 
															+        )
														
 
															+        return thresh
														
 
															+    
														
 
															+    def detect_text_regions(self, image: np.ndarray, preprocess_func) -> List[Tuple]:
														
 
															+        """Detect text regions using contours."""
														
 
															+        processed = preprocess_func(image)
														
 
															+        contours, _ = cv2.findContours(processed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
														
 
															+        text_regions = []
														
 
															+        for contour in contours:
														
 
															+            x, y, w, h = cv2.boundingRect(contour)
														
 
															+            if w > 30 and h > 30:  # Filter small regions
														
 
															+                aspect_ratio = h / w
														
 
															+                text_regions.append((x, y, w, h, aspect_ratio))
														
 
															+        return text_regions
														
 
															+    
														
 
															+    def classify_and_extract_text(self, image: np.ndarray, regions: List[Tuple]) -> List[Tuple]:
														
 
															+        """Classify regions as horizontal or vertical and extract text."""
														
 
															+        reader = self._get_reader()
														
 
															+        all_detected_text = []
														
 
															+        
														
 
															+        for (x, y, w, h, aspect_ratio) in regions:
														
 
															+            roi = image[y:y + h, x:x + w]
														
 
															+            if aspect_ratio > 1.5:  # Vertical text
														
 
															+                roi = cv2.rotate(roi, cv2.ROTATE_90_CLOCKWISE)
														
 
															+            
														
 
															+            results = reader.readtext(roi, detail=1)
														
 
															+            for _, text, confidence in results:
														
 
															+                all_detected_text.append((text, confidence))
														
 
															+        
														
 
															+        return all_detected_text
														
 
															+    
														
 
															+    def clean_ocr_output(self, ocr_results: List[Tuple], confidence_threshold: float = 0.40) -> List[Tuple]:
														
 
															+        """Clean OCR results by removing unwanted characters and low-confidence detections."""
														
 
															+        cleaned_results = []
														
 
															+        for text, confidence in ocr_results:
														
 
															+            if confidence < confidence_threshold:
														
 
															+                continue
														
 
															+            
														
 
															+            # Remove unwanted characters using regex
														
 
															+            cleaned_text = re.sub(r"[^A-Za-z0-9\s\.\,\(\)\-\%\/]", "", text)
														
 
															+            cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
														
 
															+            
														
 
															+            # Remove unwanted numeric characters like single digits
														
 
															+            if len(cleaned_text) == 1 and cleaned_text.isdigit():
														
 
															+                continue
														
 
															+            
														
 
															+            if any(char.isdigit() for char in cleaned_text) and len(cleaned_text) < 2:
														
 
															+                continue
														
 
															+            
														
 
															+            if len(cleaned_text.strip()) > 0:
														
 
															+                cleaned_results.append((cleaned_text.strip(), confidence))
														
 
															+        
														
 
															+        return cleaned_results
														
 
															+    
														
 
															+    def process_image(self, image_url: str) -> Dict:
														
 
															+        """Main method to process image and extract text."""
														
 
															+        try:
														
 
															+            # Download image
														
 
															+            image = self.download_image(image_url)
														
 
															+            if image is None:
														
 
															+                return {
														
 
															+                    "detected_text": [],
														
 
															+                    "extracted_attributes": {},
														
 
															+                    "error": "Failed to download image"
														
 
															+                }
														
 
															+            
														
 
															+            # Detect and process horizontal text
														
 
															+            horizontal_regions = self.detect_text_regions(image, self.preprocess_horizontal)
														
 
															+            horizontal_text = self.classify_and_extract_text(image, horizontal_regions)
														
 
															+            
														
 
															+            # Detect and process vertical text
														
 
															+            vertical_regions = self.detect_text_regions(image, self.preprocess_vertical)
														
 
															+            vertical_text = self.classify_and_extract_text(image, vertical_regions)
														
 
															+            
														
 
															+            # Combine results
														
 
															+            all_text = horizontal_text + vertical_text
														
 
															+            
														
 
															+            # Clean results
														
 
															+            cleaned_results = self.clean_ocr_output(all_text, confidence_threshold=0.40)
														
 
															+            
														
 
															+            # Format for response
														
 
															+            detected_text = [
														
 
															+                {"text": text, "confidence": float(confidence)} 
														
 
															+                for text, confidence in cleaned_results
														
 
															+            ]
														
 
															+            
														
 
															+            return {
														
 
															+                "detected_text": detected_text,
														
 
															+                "extracted_attributes": {}
														
 
															+            }
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            logger.error(f"Error processing image: {str(e)}")
														
 
															+            return {
														
 
															+                "detected_text": [],
														
 
															+                "extracted_attributes": {},
														
 
															+                "error": str(e)
														
 
															+            }
														
--- a/attr_extraction/serializers.py
+++ b/attr_extraction/serializers.py
@@ -0,0 +1,196 @@
 
															+# from rest_framework import serializers
														
 
															+
														
 
															+# class ProductInputSerializer(serializers.Serializer):
														
 
															+#     """Serializer for individual product input."""
														
 
															+#     product_id = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+#     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+#     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+#     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+
														
 
															+
														
 
															+# class SingleProductRequestSerializer(serializers.Serializer):
														
 
															+#     """Serializer for single product extraction request."""
														
 
															+#     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+#     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+#     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+#     mandatory_attrs = serializers.DictField(
														
 
															+#         child=serializers.ListField(child=serializers.CharField()),
														
 
															+#         required=True
														
 
															+#     )
														
 
															+#     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
														
 
															+#     extract_additional = serializers.BooleanField(required=False, default=True)
														
 
															+
														
 
															+#     def validate_model(self, value):
														
 
															+#         from django.conf import settings
														
 
															+#         if value not in settings.SUPPORTED_MODELS:
														
 
															+#             raise serializers.ValidationError(
														
 
															+#                 f"Model must be one of {settings.SUPPORTED_MODELS}"
														
 
															+#             )
														
 
															+#         return value
														
 
															+
														
 
															+
														
 
															+# class BatchProductRequestSerializer(serializers.Serializer):
														
 
															+#     """Serializer for batch product extraction request."""
														
 
															+#     products = serializers.ListField(
														
 
															+#         child=ProductInputSerializer(),
														
 
															+#         required=True,
														
 
															+#         min_length=1
														
 
															+#     )
														
 
															+#     mandatory_attrs = serializers.DictField(
														
 
															+#         child=serializers.ListField(child=serializers.CharField()),
														
 
															+#         required=True
														
 
															+#     )
														
 
															+#     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
														
 
															+#     extract_additional = serializers.BooleanField(required=False, default=True)
														
 
															+
														
 
															+#     def validate_model(self, value):
														
 
															+#         from django.conf import settings
														
 
															+#         if value not in settings.SUPPORTED_MODELS:
														
 
															+#             raise serializers.ValidationError(
														
 
															+#                 f"Model must be one of {settings.SUPPORTED_MODELS}"
														
 
															+#             )
														
 
															+#         return value
														
 
															+
														
 
															+#     def validate_products(self, value):
														
 
															+#         from django.conf import settings
														
 
															+#         max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
														
 
															+#         if len(value) > max_size:
														
 
															+#             raise serializers.ValidationError(
														
 
															+#                 f"Batch size cannot exceed {max_size} products"
														
 
															+#             )
														
 
															+#         return value
														
 
															+
														
 
															+
														
 
															+# class ProductAttributeResultSerializer(serializers.Serializer):
														
 
															+#     """Serializer for individual product extraction result."""
														
 
															+#     product_id = serializers.CharField(required=False)
														
 
															+#     mandatory = serializers.DictField()
														
 
															+#     additional = serializers.DictField(required=False)
														
 
															+#     error = serializers.CharField(required=False)
														
 
															+#     raw_output = serializers.CharField(required=False)
														
 
															+
														
 
															+
														
 
															+# class BatchProductResponseSerializer(serializers.Serializer):
														
 
															+#     """Serializer for batch extraction response."""
														
 
															+#     results = serializers.ListField(child=ProductAttributeResultSerializer())
														
 
															+#     total_products = serializers.IntegerField()
														
 
															+#     successful = serializers.IntegerField()
														
 
															+#     failed = serializers.IntegerField()
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+# ==================== serializers.py ====================
														
 
															+from rest_framework import serializers
														
 
															+
														
 
															+class ProductInputSerializer(serializers.Serializer):
														
 
															+    """Serializer for individual product input."""
														
 
															+    product_id = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+    title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+    short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+    long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+    image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
														
 
															+
														
 
															+
														
 
															+class SingleProductRequestSerializer(serializers.Serializer):
														
 
															+    """Serializer for single product extraction request."""
														
 
															+    title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+    short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+    long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
														
 
															+    image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
														
 
															+    mandatory_attrs = serializers.DictField(
														
 
															+        child=serializers.ListField(child=serializers.CharField()),
														
 
															+        required=True
														
 
															+    )
														
 
															+    model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
														
 
															+    extract_additional = serializers.BooleanField(required=False, default=True)
														
 
															+    process_image = serializers.BooleanField(required=False, default=True)
														
 
															+
														
 
															+    def validate_model(self, value):
														
 
															+        from django.conf import settings
														
 
															+        if value not in settings.SUPPORTED_MODELS:
														
 
															+            raise serializers.ValidationError(
														
 
															+                f"Model must be one of {settings.SUPPORTED_MODELS}"
														
 
															+            )
														
 
															+        return value
														
 
															+
														
 
															+
														
 
															+class BatchProductRequestSerializer(serializers.Serializer):
														
 
															+    """Serializer for batch product extraction request."""
														
 
															+    products = serializers.ListField(
														
 
															+        child=ProductInputSerializer(),
														
 
															+        required=True,
														
 
															+        min_length=1
														
 
															+    )
														
 
															+    mandatory_attrs = serializers.DictField(
														
 
															+        child=serializers.ListField(child=serializers.CharField()),
														
 
															+        required=True
														
 
															+    )
														
 
															+    model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
														
 
															+    extract_additional = serializers.BooleanField(required=False, default=True)
														
 
															+    process_image = serializers.BooleanField(required=False, default=True)
														
 
															+
														
 
															+    def validate_model(self, value):
														
 
															+        from django.conf import settings
														
 
															+        if value not in settings.SUPPORTED_MODELS:
														
 
															+            raise serializers.ValidationError(
														
 
															+                f"Model must be one of {settings.SUPPORTED_MODELS}"
														
 
															+            )
														
 
															+        return value
														
 
															+
														
 
															+    def validate_products(self, value):
														
 
															+        from django.conf import settings
														
 
															+        max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
														
 
															+        if len(value) > max_size:
														
 
															+            raise serializers.ValidationError(
														
 
															+                f"Batch size cannot exceed {max_size} products"
														
 
															+            )
														
 
															+        return value
														
 
															+
														
 
															+
														
 
															+class OCRResultSerializer(serializers.Serializer):
														
 
															+    """Serializer for OCR results."""
														
 
															+    detected_text = serializers.ListField(child=serializers.DictField())
														
 
															+    extracted_attributes = serializers.DictField()
														
 
															+
														
 
															+
														
 
															+class ProductAttributeResultSerializer(serializers.Serializer):
														
 
															+    """Serializer for individual product extraction result."""
														
 
															+    product_id = serializers.CharField(required=False)
														
 
															+    mandatory = serializers.DictField()
														
 
															+    additional = serializers.DictField(required=False)
														
 
															+    ocr_results = OCRResultSerializer(required=False)
														
 
															+    error = serializers.CharField(required=False)
														
 
															+    raw_output = serializers.CharField(required=False)
														
 
															+
														
 
															+
														
 
															+class BatchProductResponseSerializer(serializers.Serializer):
														
 
															+    """Serializer for batch extraction response."""
														
 
															+    results = serializers.ListField(child=ProductAttributeResultSerializer())
														
 
															+    total_products = serializers.IntegerField()
														
 
															+    successful = serializers.IntegerField()
														
 
															+    failed = serializers.IntegerField()
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+from rest_framework import serializers
														
 
															+from .models import Product
														
 
															+
														
 
															+class ProductSerializer(serializers.ModelSerializer):
														
 
															+    class Meta:
														
 
															+        model = Product
														
 
															+        fields = [
														
 
															+            'id',
														
 
															+            'item_id',
														
 
															+            'product_name',
														
 
															+            'product_long_description',
														
 
															+            'product_short_description',
														
 
															+            'product_type',
														
 
															+            'image_path',
														
 
															+            'image',
														
 
															+        ]
														
--- a/attr_extraction/services.py
+++ b/attr_extraction/services.py
@@ -0,0 +1,942 @@
 
															+# # import requests
														
 
															+# # import json
														
 
															+# # from typing import Dict, List, Optional
														
 
															+# # from django.conf import settings
														
 
															+
														
 
															+
														
 
															+# # class ProductAttributeService:
														
 
															+# #     """Service class for extracting product attributes using Groq LLM."""
														
 
															+
														
 
															+# #     @staticmethod
														
 
															+# #     def combine_product_text(
														
 
															+# #         title: Optional[str] = None,
														
 
															+# #         short_desc: Optional[str] = None,
														
 
															+# #         long_desc: Optional[str] = None
														
 
															+# #     ) -> str:
														
 
															+# #         """Combine product metadata into a single text block."""
														
 
															+# #         parts = []
														
 
															+# #         if title:
														
 
															+# #             parts.append(str(title).strip())
														
 
															+# #         if short_desc:
														
 
															+# #             parts.append(str(short_desc).strip())
														
 
															+# #         if long_desc:
														
 
															+# #             parts.append(str(long_desc).strip())
														
 
															+
														
 
															+# #         combined = " ".join(parts).strip()
														
 
															+
														
 
															+# #         if not combined:
														
 
															+# #             return "No product information available"
														
 
															+
														
 
															+# #         return combined
														
 
															+
														
 
															+# #     @staticmethod
														
 
															+# #     def extract_attributes(
														
 
															+# #         product_text: str,
														
 
															+# #         mandatory_attrs: Dict[str, List[str]],
														
 
															+# #         model: str = None,
														
 
															+# #         extract_additional: bool = True
														
 
															+# #     ) -> dict:
														
 
															+# #         """Use Groq LLM to extract attributes from any product type."""
														
 
															+        
														
 
															+# #         if model is None:
														
 
															+# #             model = settings.SUPPORTED_MODELS[0]
														
 
															+
														
 
															+# #         # Check if product text is empty or minimal
														
 
															+# #         if not product_text or product_text == "No product information available":
														
 
															+# #             return ProductAttributeService._create_error_response(
														
 
															+# #                 "No product information provided",
														
 
															+# #                 mandatory_attrs,
														
 
															+# #                 extract_additional
														
 
															+# #             )
														
 
															+
														
 
															+# #         # Create structured prompt for mandatory attributes
														
 
															+# #         mandatory_attr_list = []
														
 
															+# #         for attr_name, allowed_values in mandatory_attrs.items():
														
 
															+# #             mandatory_attr_list.append(f"{attr_name}: {', '.join(allowed_values)}")
														
 
															+# #         mandatory_attr_text = "\n".join(mandatory_attr_list)
														
 
															+
														
 
															+# #         additional_instruction = ""
														
 
															+# #         if extract_additional:
														
 
															+# #             additional_instruction = """
														
 
															+# # 2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
														
 
															+# #    (such as Material, Size, Color, Brand, Dimensions, Weight, Features, Specifications, etc.) 
														
 
															+# #    and their values. Extract attributes that are specific and relevant to this product type."""
														
 
															+
														
 
															+# #         output_format = {
														
 
															+# #             "mandatory": {attr: "value" for attr in mandatory_attrs.keys()},
														
 
															+# #             "additional": {} if extract_additional else None
														
 
															+# #         }
														
 
															+
														
 
															+# #         if not extract_additional:
														
 
															+# #             output_format.pop("additional")
														
 
															+
														
 
															+# #         prompt = f"""
														
 
															+# # You are an intelligent product attribute extractor that works with ANY product type.
														
 
															+
														
 
															+# # TASK:
														
 
															+# # 1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value 
														
 
															+# #    from the provided list. Choose the value that best matches the product description.
														
 
															+# # {additional_instruction}
														
 
															+
														
 
															+# # Product Text:
														
 
															+# # {product_text}
														
 
															+
														
 
															+# # Mandatory Attribute Lists (MUST select one value for each):
														
 
															+# # {mandatory_attr_text}
														
 
															+
														
 
															+# # CRITICAL INSTRUCTIONS:
														
 
															+# # - Return ONLY valid JSON, nothing else
														
 
															+# # - No explanations, no markdown, no text before or after the JSON
														
 
															+# # - For mandatory attributes, choose EXACTLY ONE value from the provided list that best matches
														
 
															+# # - If a mandatory attribute cannot be determined from the product text, use "Not Specified"
														
 
															+# # - Work with whatever information is available - the product text may be incomplete (only title, or only description, etc.)
														
 
															+# # {f"- For additional attributes, extract any relevant information found in the product text" if extract_additional else ""}
														
 
															+# # - Be precise and only extract information that is explicitly stated or clearly implied
														
 
															+
														
 
															+# # Required Output Format (ONLY THIS, NO OTHER TEXT):
														
 
															+# # {json.dumps(output_format, indent=2)}
														
 
															+# #         """
														
 
															+
														
 
															+# #         payload = {
														
 
															+# #             "model": model,
														
 
															+# #             "messages": [
														
 
															+# #                 {
														
 
															+# #                     "role": "system",
														
 
															+# #                     "content": f"You are a precise attribute extraction model. Return ONLY valid JSON with {'mandatory and additional' if extract_additional else 'mandatory'} sections. No explanations, no markdown, no other text."
														
 
															+# #                 },
														
 
															+# #                 {"role": "user", "content": prompt}
														
 
															+# #             ],
														
 
															+# #             "temperature": 0.0,
														
 
															+# #             "max_tokens": 1500
														
 
															+# #         }
														
 
															+
														
 
															+# #         headers = {
														
 
															+# #             "Authorization": f"Bearer {settings.GROQ_API_KEY}",
														
 
															+# #             "Content-Type": "application/json",
														
 
															+# #         }
														
 
															+
														
 
															+# #         try:
														
 
															+# #             response = requests.post(
														
 
															+# #                 settings.GROQ_API_URL,
														
 
															+# #                 headers=headers,
														
 
															+# #                 json=payload,
														
 
															+# #                 timeout=30
														
 
															+# #             )
														
 
															+# #             response.raise_for_status()
														
 
															+# #             result_text = response.json()["choices"][0]["message"]["content"].strip()
														
 
															+
														
 
															+# #             # Clean the response
														
 
															+# #             result_text = ProductAttributeService._clean_json_response(result_text)
														
 
															+
														
 
															+# #             # Parse JSON
														
 
															+# #             parsed = json.loads(result_text)
														
 
															+
														
 
															+# #             # Validate and restructure if needed
														
 
															+# #             parsed = ProductAttributeService._validate_response_structure(
														
 
															+# #                 parsed, mandatory_attrs, extract_additional
														
 
															+# #             )
														
 
															+
														
 
															+# #             return parsed
														
 
															+
														
 
															+# #         except requests.exceptions.RequestException as e:
														
 
															+# #             return ProductAttributeService._create_error_response(
														
 
															+# #                 str(e), mandatory_attrs, extract_additional
														
 
															+# #             )
														
 
															+# #         except json.JSONDecodeError as e:
														
 
															+# #             return ProductAttributeService._create_error_response(
														
 
															+# #                 f"Invalid JSON: {str(e)}", mandatory_attrs, extract_additional, result_text
														
 
															+# #             )
														
 
															+# #         except Exception as e:
														
 
															+# #             return ProductAttributeService._create_error_response(
														
 
															+# #                 str(e), mandatory_attrs, extract_additional
														
 
															+# #             )
														
 
															+
														
 
															+# #     @staticmethod
														
 
															+# #     def _clean_json_response(text: str) -> str:
														
 
															+# #         """Clean LLM response to extract valid JSON."""
														
 
															+# #         start_idx = text.find('{')
														
 
															+# #         end_idx = text.rfind('}')
														
 
															+
														
 
															+# #         if start_idx != -1 and end_idx != -1:
														
 
															+# #             text = text[start_idx:end_idx + 1]
														
 
															+
														
 
															+# #         if "```json" in text:
														
 
															+# #             text = text.split("```json")[1].split("```")[0].strip()
														
 
															+# #         elif "```" in text:
														
 
															+# #             text = text.split("```")[1].split("```")[0].strip()
														
 
															+# #             if text.startswith("json"):
														
 
															+# #                 text = text[4:].strip()
														
 
															+
														
 
															+# #         return text
														
 
															+
														
 
															+# #     @staticmethod
														
 
															+# #     def _validate_response_structure(
														
 
															+# #         parsed: dict,
														
 
															+# #         mandatory_attrs: Dict[str, List[str]],
														
 
															+# #         extract_additional: bool
														
 
															+# #     ) -> dict:
														
 
															+# #         """Validate and fix the response structure."""
														
 
															+# #         expected_sections = ["mandatory"]
														
 
															+# #         if extract_additional:
														
 
															+# #             expected_sections.append("additional")
														
 
															+
														
 
															+# #         if not all(section in parsed for section in expected_sections):
														
 
															+# #             if isinstance(parsed, dict):
														
 
															+# #                 mandatory_keys = set(mandatory_attrs.keys())
														
 
															+# #                 mandatory = {k: v for k, v in parsed.items() if k in mandatory_keys}
														
 
															+# #                 additional = {k: v for k, v in parsed.items() if k not in mandatory_keys}
														
 
															+
														
 
															+# #                 result = {"mandatory": mandatory}
														
 
															+# #                 if extract_additional:
														
 
															+# #                     result["additional"] = additional
														
 
															+# #                 return result
														
 
															+# #             else:
														
 
															+# #                 return ProductAttributeService._create_error_response(
														
 
															+# #                     "Invalid response structure",
														
 
															+# #                     mandatory_attrs,
														
 
															+# #                     extract_additional,
														
 
															+# #                     str(parsed)
														
 
															+# #                 )
														
 
															+
														
 
															+# #         return parsed
														
 
															+
														
 
															+# #     @staticmethod
														
 
															+# #     def _create_error_response(
														
 
															+# #         error: str,
														
 
															+# #         mandatory_attrs: Dict[str, List[str]],
														
 
															+# #         extract_additional: bool,
														
 
															+# #         raw_output: Optional[str] = None
														
 
															+# #     ) -> dict:
														
 
															+# #         """Create a standardized error response."""
														
 
															+# #         response = {
														
 
															+# #             "mandatory": {attr: "Not Specified" for attr in mandatory_attrs.keys()},
														
 
															+# #             "error": error
														
 
															+# #         }
														
 
															+# #         if extract_additional:
														
 
															+# #             response["additional"] = {}
														
 
															+# #         if raw_output:
														
 
															+# #             response["raw_output"] = raw_output
														
 
															+# #         return response
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+# import requests
														
 
															+# import json
														
 
															+# from typing import Dict, List, Optional
														
 
															+# from django.conf import settings
														
 
															+# from concurrent.futures import ThreadPoolExecutor, as_completed
														
 
															+
														
 
															+
														
 
															+# class ProductAttributeService:
														
 
															+#     """Service class for extracting product attributes using Groq LLM."""
														
 
															+
														
 
															+#     @staticmethod
														
 
															+#     def combine_product_text(
														
 
															+#         title: Optional[str] = None,
														
 
															+#         short_desc: Optional[str] = None,
														
 
															+#         long_desc: Optional[str] = None
														
 
															+#     ) -> str:
														
 
															+#         """Combine product metadata into a single text block."""
														
 
															+#         parts = []
														
 
															+#         if title:
														
 
															+#             parts.append(str(title).strip())
														
 
															+#         if short_desc:
														
 
															+#             parts.append(str(short_desc).strip())
														
 
															+#         if long_desc:
														
 
															+#             parts.append(str(long_desc).strip())
														
 
															+
														
 
															+#         combined = " ".join(parts).strip()
														
 
															+
														
 
															+#         if not combined:
														
 
															+#             return "No product information available"
														
 
															+
														
 
															+#         return combined
														
 
															+
														
 
															+#     @staticmethod
														
 
															+#     def extract_attributes(
														
 
															+#         product_text: str,
														
 
															+#         mandatory_attrs: Dict[str, List[str]],
														
 
															+#         model: str = None,
														
 
															+#         extract_additional: bool = True
														
 
															+#     ) -> dict:
														
 
															+#         """Use Groq LLM to extract attributes from any product type."""
														
 
															+        
														
 
															+#         if model is None:
														
 
															+#             model = settings.SUPPORTED_MODELS[0]
														
 
															+
														
 
															+#         # Check if product text is empty or minimal
														
 
															+#         if not product_text or product_text == "No product information available":
														
 
															+#             return ProductAttributeService._create_error_response(
														
 
															+#                 "No product information provided",
														
 
															+#                 mandatory_attrs,
														
 
															+#                 extract_additional
														
 
															+#             )
														
 
															+
														
 
															+#         # Create structured prompt for mandatory attributes
														
 
															+#         mandatory_attr_list = []
														
 
															+#         for attr_name, allowed_values in mandatory_attrs.items():
														
 
															+#             mandatory_attr_list.append(f"{attr_name}: {', '.join(allowed_values)}")
														
 
															+#         mandatory_attr_text = "\n".join(mandatory_attr_list)
														
 
															+
														
 
															+#         additional_instruction = ""
														
 
															+#         if extract_additional:
														
 
															+#             additional_instruction = """
														
 
															+# 2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
														
 
															+#    (such as Material, Size, Color, Brand, Dimensions, Weight, Features, Specifications, etc.) 
														
 
															+#    and their values. Extract attributes that are specific and relevant to this product type."""
														
 
															+
														
 
															+#         output_format = {
														
 
															+#             "mandatory": {attr: "value" for attr in mandatory_attrs.keys()},
														
 
															+#             "additional": {} if extract_additional else None
														
 
															+#         }
														
 
															+
														
 
															+#         if not extract_additional:
														
 
															+#             output_format.pop("additional")
														
 
															+
														
 
															+#         prompt = f"""
														
 
															+# You are an intelligent product attribute extractor that works with ANY product type.
														
 
															+
														
 
															+# TASK:
														
 
															+# 1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value 
														
 
															+#    from the provided list. Choose the value that best matches the product description.
														
 
															+# {additional_instruction}
														
 
															+
														
 
															+# Product Text:
														
 
															+# {product_text}
														
 
															+
														
 
															+# Mandatory Attribute Lists (MUST select one value for each):
														
 
															+# {mandatory_attr_text}
														
 
															+
														
 
															+# CRITICAL INSTRUCTIONS:
														
 
															+# - Return ONLY valid JSON, nothing else
														
 
															+# - No explanations, no markdown, no text before or after the JSON
														
 
															+# - For mandatory attributes, choose EXACTLY ONE value from the provided list that best matches
														
 
															+# - If a mandatory attribute cannot be determined from the product text, use "Not Specified"
														
 
															+# - Work with whatever information is available - the product text may be incomplete (only title, or only description, etc.)
														
 
															+# {f"- For additional attributes, extract any relevant information found in the product text" if extract_additional else ""}
														
 
															+# - Be precise and only extract information that is explicitly stated or clearly implied
														
 
															+
														
 
															+# Required Output Format (ONLY THIS, NO OTHER TEXT):
														
 
															+# {json.dumps(output_format, indent=2)}
														
 
															+#         """
														
 
															+
														
 
															+#         payload = {
														
 
															+#             "model": model,
														
 
															+#             "messages": [
														
 
															+#                 {
														
 
															+#                     "role": "system",
														
 
															+#                     "content": f"You are a precise attribute extraction model. Return ONLY valid JSON with {'mandatory and additional' if extract_additional else 'mandatory'} sections. No explanations, no markdown, no other text."
														
 
															+#                 },
														
 
															+#                 {"role": "user", "content": prompt}
														
 
															+#             ],
														
 
															+#             "temperature": 0.0,
														
 
															+#             "max_tokens": 1500
														
 
															+#         }
														
 
															+
														
 
															+#         headers = {
														
 
															+#             "Authorization": f"Bearer {settings.GROQ_API_KEY}",
														
 
															+#             "Content-Type": "application/json",
														
 
															+#         }
														
 
															+
														
 
															+#         try:
														
 
															+#             response = requests.post(
														
 
															+#                 settings.GROQ_API_URL,
														
 
															+#                 headers=headers,
														
 
															+#                 json=payload,
														
 
															+#                 timeout=30
														
 
															+#             )
														
 
															+#             response.raise_for_status()
														
 
															+#             result_text = response.json()["choices"][0]["message"]["content"].strip()
														
 
															+
														
 
															+#             # Clean the response
														
 
															+#             result_text = ProductAttributeService._clean_json_response(result_text)
														
 
															+
														
 
															+#             # Parse JSON
														
 
															+#             parsed = json.loads(result_text)
														
 
															+
														
 
															+#             # Validate and restructure if needed
														
 
															+#             parsed = ProductAttributeService._validate_response_structure(
														
 
															+#                 parsed, mandatory_attrs, extract_additional
														
 
															+#             )
														
 
															+
														
 
															+#             return parsed
														
 
															+
														
 
															+#         except requests.exceptions.RequestException as e:
														
 
															+#             return ProductAttributeService._create_error_response(
														
 
															+#                 str(e), mandatory_attrs, extract_additional
														
 
															+#             )
														
 
															+#         except json.JSONDecodeError as e:
														
 
															+#             return ProductAttributeService._create_error_response(
														
 
															+#                 f"Invalid JSON: {str(e)}", mandatory_attrs, extract_additional, result_text
														
 
															+#             )
														
 
															+#         except Exception as e:
														
 
															+#             return ProductAttributeService._create_error_response(
														
 
															+#                 str(e), mandatory_attrs, extract_additional
														
 
															+#             )
														
 
															+
														
 
															+#     @staticmethod
														
 
															+#     def extract_attributes_batch(
														
 
															+#         products: List[Dict],
														
 
															+#         mandatory_attrs: Dict[str, List[str]],
														
 
															+#         model: str = None,
														
 
															+#         extract_additional: bool = True,
														
 
															+#         max_workers: int = 5
														
 
															+#     ) -> Dict:
														
 
															+#         """
														
 
															+#         Extract attributes for multiple products in parallel.
														
 
															+        
														
 
															+#         Args:
														
 
															+#             products: List of product dictionaries with keys: product_id, title, short_desc, long_desc
														
 
															+#             mandatory_attrs: Dictionary of mandatory attributes
														
 
															+#             model: Groq model to use
														
 
															+#             extract_additional: Whether to extract additional attributes
														
 
															+#             max_workers: Maximum number of parallel workers
														
 
															+            
														
 
															+#         Returns:
														
 
															+#             Dictionary with results, total_products, successful, and failed counts
														
 
															+#         """
														
 
															+#         results = []
														
 
															+#         successful = 0
														
 
															+#         failed = 0
														
 
															+
														
 
															+#         def process_product(product_data):
														
 
															+#             """Process a single product."""
														
 
															+#             product_id = product_data.get('product_id', f"product_{len(results)}")
														
 
															+            
														
 
															+#             try:
														
 
															+#                 product_text = ProductAttributeService.combine_product_text(
														
 
															+#                     title=product_data.get('title'),
														
 
															+#                     short_desc=product_data.get('short_desc'),
														
 
															+#                     long_desc=product_data.get('long_desc')
														
 
															+#                 )
														
 
															+                
														
 
															+#                 result = ProductAttributeService.extract_attributes(
														
 
															+#                     product_text=product_text,
														
 
															+#                     mandatory_attrs=mandatory_attrs,
														
 
															+#                     model=model,
														
 
															+#                     extract_additional=extract_additional
														
 
															+#                 )
														
 
															+                
														
 
															+#                 result['product_id'] = product_id
														
 
															+                
														
 
															+#                 # Check if extraction was successful
														
 
															+#                 if 'error' not in result:
														
 
															+#                     return result, True
														
 
															+#                 else:
														
 
															+#                     return result, False
														
 
															+                    
														
 
															+#             except Exception as e:
														
 
															+#                 return {
														
 
															+#                     'product_id': product_id,
														
 
															+#                     'mandatory': {attr: "Not Specified" for attr in mandatory_attrs.keys()},
														
 
															+#                     'additional': {} if extract_additional else None,
														
 
															+#                     'error': f"Processing error: {str(e)}"
														
 
															+#                 }, False
														
 
															+
														
 
															+#         # Process products in parallel
														
 
															+#         with ThreadPoolExecutor(max_workers=max_workers) as executor:
														
 
															+#             future_to_product = {
														
 
															+#                 executor.submit(process_product, product): product 
														
 
															+#                 for product in products
														
 
															+#             }
														
 
															+            
														
 
															+#             for future in as_completed(future_to_product):
														
 
															+#                 try:
														
 
															+#                     result, success = future.result()
														
 
															+#                     results.append(result)
														
 
															+#                     if success:
														
 
															+#                         successful += 1
														
 
															+#                     else:
														
 
															+#                         failed += 1
														
 
															+#                 except Exception as e:
														
 
															+#                     failed += 1
														
 
															+#                     results.append({
														
 
															+#                         'product_id': 'unknown',
														
 
															+#                         'mandatory': {attr: "Not Specified" for attr in mandatory_attrs.keys()},
														
 
															+#                         'additional': {} if extract_additional else None,
														
 
															+#                         'error': f"Unexpected error: {str(e)}"
														
 
															+#                     })
														
 
															+
														
 
															+#         return {
														
 
															+#             'results': results,
														
 
															+#             'total_products': len(products),
														
 
															+#             'successful': successful,
														
 
															+#             'failed': failed
														
 
															+#         }
														
 
															+
														
 
															+#     @staticmethod
														
 
															+#     def _clean_json_response(text: str) -> str:
														
 
															+#         """Clean LLM response to extract valid JSON."""
														
 
															+#         start_idx = text.find('{')
														
 
															+#         end_idx = text.rfind('}')
														
 
															+
														
 
															+#         if start_idx != -1 and end_idx != -1:
														
 
															+#             text = text[start_idx:end_idx + 1]
														
 
															+
														
 
															+#         if "```json" in text:
														
 
															+#             text = text.split("```json")[1].split("```")[0].strip()
														
 
															+#         elif "```" in text:
														
 
															+#             text = text.split("```")[1].split("```")[0].strip()
														
 
															+#             if text.startswith("json"):
														
 
															+#                 text = text[4:].strip()
														
 
															+
														
 
															+#         return text
														
 
															+
														
 
															+#     @staticmethod
														
 
															+#     def _validate_response_structure(
														
 
															+#         parsed: dict,
														
 
															+#         mandatory_attrs: Dict[str, List[str]],
														
 
															+#         extract_additional: bool
														
 
															+#     ) -> dict:
														
 
															+#         """Validate and fix the response structure."""
														
 
															+#         expected_sections = ["mandatory"]
														
 
															+#         if extract_additional:
														
 
															+#             expected_sections.append("additional")
														
 
															+
														
 
															+#         if not all(section in parsed for section in expected_sections):
														
 
															+#             if isinstance(parsed, dict):
														
 
															+#                 mandatory_keys = set(mandatory_attrs.keys())
														
 
															+#                 mandatory = {k: v for k, v in parsed.items() if k in mandatory_keys}
														
 
															+#                 additional = {k: v for k, v in parsed.items() if k not in mandatory_keys}
														
 
															+
														
 
															+#                 result = {"mandatory": mandatory}
														
 
															+#                 if extract_additional:
														
 
															+#                     result["additional"] = additional
														
 
															+#                 return result
														
 
															+#             else:
														
 
															+#                 return ProductAttributeService._create_error_response(
														
 
															+#                     "Invalid response structure",
														
 
															+#                     mandatory_attrs,
														
 
															+#                     extract_additional,
														
 
															+#                     str(parsed)
														
 
															+#                 )
														
 
															+
														
 
															+#         return parsed
														
 
															+
														
 
															+#     @staticmethod
														
 
															+#     def _create_error_response(
														
 
															+#         error: str,
														
 
															+#         mandatory_attrs: Dict[str, List[str]],
														
 
															+#         extract_additional: bool,
														
 
															+#         raw_output: Optional[str] = None
														
 
															+#     ) -> dict:
														
 
															+#         """Create a standardized error response."""
														
 
															+#         response = {
														
 
															+#             "mandatory": {attr: "Not Specified" for attr in mandatory_attrs.keys()},
														
 
															+#             "error": error
														
 
															+#         }
														
 
															+#         if extract_additional:
														
 
															+#             response["additional"] = {}
														
 
															+#         if raw_output:
														
 
															+#             response["raw_output"] = raw_output
														
 
															+#         return response
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+# ==================== services.py ====================
														
 
															+import requests
														
 
															+import json
														
 
															+from typing import Dict, List, Optional
														
 
															+from django.conf import settings
														
 
															+from concurrent.futures import ThreadPoolExecutor, as_completed
														
 
															+from .ocr_service import OCRService
														
 
															+
														
 
															+
														
 
															+class ProductAttributeService:
														
 
															+    """Service class for extracting product attributes using Groq LLM."""
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def combine_product_text(
														
 
															+        title: Optional[str] = None,
														
 
															+        short_desc: Optional[str] = None,
														
 
															+        long_desc: Optional[str] = None,
														
 
															+        ocr_text: Optional[str] = None
														
 
															+    ) -> str:
														
 
															+        """Combine product metadata into a single text block."""
														
 
															+        parts = []
														
 
															+        if title:
														
 
															+            parts.append(f"Title: {str(title).strip()}")
														
 
															+        if short_desc:
														
 
															+            parts.append(f"Description: {str(short_desc).strip()}")
														
 
															+        if long_desc:
														
 
															+            parts.append(f"Details: {str(long_desc).strip()}")
														
 
															+        if ocr_text:
														
 
															+            parts.append(f"OCR Text: {ocr_text}")
														
 
															+        
														
 
															+        combined = "\n".join(parts).strip()
														
 
															+        
														
 
															+        if not combined:
														
 
															+            return "No product information available"
														
 
															+        
														
 
															+        return combined
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def extract_attributes_from_ocr(ocr_results: Dict, model: str = None) -> Dict:
														
 
															+        """Extract structured attributes from OCR text using LLM."""
														
 
															+        if model is None:
														
 
															+            model = settings.SUPPORTED_MODELS[0]
														
 
															+        
														
 
															+        detected_text = ocr_results.get('detected_text', [])
														
 
															+        if not detected_text:
														
 
															+            return {}
														
 
															+        
														
 
															+        # Format OCR text for prompt
														
 
															+        ocr_text = "\n".join([f"Text: {item['text']}, Confidence: {item['confidence']:.2f}" 
														
 
															+                              for item in detected_text])
														
 
															+        
														
 
															+        prompt = f"""
														
 
															+You are an AI model that extracts structured attributes from OCR text detected on product images.
														
 
															+Given the OCR detections below, infer the possible product attributes and return them as a clean JSON object.
														
 
															+
														
 
															+OCR Text:
														
 
															+{ocr_text}
														
 
															+
														
 
															+Extract relevant attributes like:
														
 
															+- brand
														
 
															+- model_number
														
 
															+- size (waist_size, length, etc.)
														
 
															+- collection
														
 
															+- any other relevant product information
														
 
															+
														
 
															+Return a JSON object with only the attributes you can confidently identify.
														
 
															+If an attribute is not present, do not include it in the response.
														
 
															+"""
														
 
															+        
														
 
															+        payload = {
														
 
															+            "model": model,
														
 
															+            "messages": [
														
 
															+                {
														
 
															+                    "role": "system",
														
 
															+                    "content": "You are a helpful AI that extracts structured data from OCR output. Return only valid JSON."
														
 
															+                },
														
 
															+                {"role": "user", "content": prompt}
														
 
															+            ],
														
 
															+            "temperature": 0.2,
														
 
															+            "max_tokens": 500
														
 
															+        }
														
 
															+        
														
 
															+        headers = {
														
 
															+            "Authorization": f"Bearer {settings.GROQ_API_KEY}",
														
 
															+            "Content-Type": "application/json",
														
 
															+        }
														
 
															+        
														
 
															+        try:
														
 
															+            response = requests.post(
														
 
															+                settings.GROQ_API_URL,
														
 
															+                headers=headers,
														
 
															+                json=payload,
														
 
															+                timeout=30
														
 
															+            )
														
 
															+            response.raise_for_status()
														
 
															+            result_text = response.json()["choices"][0]["message"]["content"].strip()
														
 
															+            
														
 
															+            # Clean and parse JSON
														
 
															+            result_text = ProductAttributeService._clean_json_response(result_text)
														
 
															+            parsed = json.loads(result_text)
														
 
															+            
														
 
															+            return parsed
														
 
															+        except Exception as e:
														
 
															+            return {"error": f"Failed to extract attributes from OCR: {str(e)}"}
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def extract_attributes(
														
 
															+        product_text: str,
														
 
															+        mandatory_attrs: Dict[str, List[str]],
														
 
															+        model: str = None,
														
 
															+        extract_additional: bool = True
														
 
															+    ) -> dict:
														
 
															+        """Use Groq LLM to extract attributes from any product type."""
														
 
															+        
														
 
															+        if model is None:
														
 
															+            model = settings.SUPPORTED_MODELS[0]
														
 
															+
														
 
															+        # Check if product text is empty or minimal
														
 
															+        if not product_text or product_text == "No product information available":
														
 
															+            return ProductAttributeService._create_error_response(
														
 
															+                "No product information provided",
														
 
															+                mandatory_attrs,
														
 
															+                extract_additional
														
 
															+            )
														
 
															+
														
 
															+        # Create structured prompt for mandatory attributes
														
 
															+        mandatory_attr_list = []
														
 
															+        for attr_name, allowed_values in mandatory_attrs.items():
														
 
															+            mandatory_attr_list.append(f"{attr_name}: {', '.join(allowed_values)}")
														
 
															+        mandatory_attr_text = "\n".join(mandatory_attr_list)
														
 
															+
														
 
															+        additional_instruction = ""
														
 
															+        if extract_additional:
														
 
															+            additional_instruction = """
														
 
															+2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
														
 
															+   (such as Material, Size, Color, Brand, Dimensions, Weight, Features, Specifications, etc.) 
														
 
															+   and their values. Extract attributes that are specific and relevant to this product type."""
														
 
															+
														
 
															+        output_format = {
														
 
															+            "mandatory": {attr: "value" for attr in mandatory_attrs.keys()},
														
 
															+            "additional": {} if extract_additional else None
														
 
															+        }
														
 
															+
														
 
															+        if not extract_additional:
														
 
															+            output_format.pop("additional")
														
 
															+
														
 
															+        prompt = f"""
														
 
															+You are an intelligent product attribute extractor that works with ANY product type.
														
 
															+
														
 
															+TASK:
														
 
															+1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value 
														
 
															+   from the provided list. Choose the value that best matches the product description.
														
 
															+{additional_instruction}
														
 
															+
														
 
															+Product Text:
														
 
															+{product_text}
														
 
															+
														
 
															+Mandatory Attribute Lists (MUST select one value for each):
														
 
															+{mandatory_attr_text}
														
 
															+
														
 
															+CRITICAL INSTRUCTIONS:
														
 
															+- Return ONLY valid JSON, nothing else
														
 
															+- No explanations, no markdown, no text before or after the JSON
														
 
															+- For mandatory attributes, choose EXACTLY ONE value from the provided list that best matches
														
 
															+- If a mandatory attribute cannot be determined from the product text, use "Not Specified"
														
 
															+- Work with whatever information is available - the product text may be incomplete
														
 
															+{f"- For additional attributes, extract any relevant information found in the product text" if extract_additional else ""}
														
 
															+- Be precise and only extract information that is explicitly stated or clearly implied
														
 
															+
														
 
															+Required Output Format (ONLY THIS, NO OTHER TEXT):
														
 
															+{json.dumps(output_format, indent=2)}
														
 
															+        """
														
 
															+
														
 
															+        payload = {
														
 
															+            "model": model,
														
 
															+            "messages": [
														
 
															+                {
														
 
															+                    "role": "system",
														
 
															+                    "content": f"You are a precise attribute extraction model. Return ONLY valid JSON with {'mandatory and additional' if extract_additional else 'mandatory'} sections. No explanations, no markdown, no other text."
														
 
															+                },
														
 
															+                {"role": "user", "content": prompt}
														
 
															+            ],
														
 
															+            "temperature": 0.0,
														
 
															+            "max_tokens": 1500
														
 
															+        }
														
 
															+
														
 
															+        headers = {
														
 
															+            "Authorization": f"Bearer {settings.GROQ_API_KEY}",
														
 
															+            "Content-Type": "application/json",
														
 
															+        }
														
 
															+
														
 
															+        try:
														
 
															+            response = requests.post(
														
 
															+                settings.GROQ_API_URL,
														
 
															+                headers=headers,
														
 
															+                json=payload,
														
 
															+                timeout=30
														
 
															+            )
														
 
															+            response.raise_for_status()
														
 
															+            result_text = response.json()["choices"][0]["message"]["content"].strip()
														
 
															+
														
 
															+            # Clean the response
														
 
															+            result_text = ProductAttributeService._clean_json_response(result_text)
														
 
															+
														
 
															+            # Parse JSON
														
 
															+            parsed = json.loads(result_text)
														
 
															+
														
 
															+            # Validate and restructure if needed
														
 
															+            parsed = ProductAttributeService._validate_response_structure(
														
 
															+                parsed, mandatory_attrs, extract_additional
														
 
															+            )
														
 
															+
														
 
															+            return parsed
														
 
															+
														
 
															+        except requests.exceptions.RequestException as e:
														
 
															+            return ProductAttributeService._create_error_response(
														
 
															+                str(e), mandatory_attrs, extract_additional
														
 
															+            )
														
 
															+        except json.JSONDecodeError as e:
														
 
															+            return ProductAttributeService._create_error_response(
														
 
															+                f"Invalid JSON: {str(e)}", mandatory_attrs, extract_additional, result_text
														
 
															+            )
														
 
															+        except Exception as e:
														
 
															+            return ProductAttributeService._create_error_response(
														
 
															+                str(e), mandatory_attrs, extract_additional
														
 
															+            )
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def extract_attributes_batch(
														
 
															+        products: List[Dict],
														
 
															+        mandatory_attrs: Dict[str, List[str]],
														
 
															+        model: str = None,
														
 
															+        extract_additional: bool = True,
														
 
															+        process_image: bool = True,
														
 
															+        max_workers: int = 5
														
 
															+    ) -> Dict:
														
 
															+        """Extract attributes for multiple products in parallel."""
														
 
															+        results = []
														
 
															+        successful = 0
														
 
															+        failed = 0
														
 
															+        
														
 
															+        ocr_service = OCRService()
														
 
															+
														
 
															+        def process_product(product_data):
														
 
															+            """Process a single product."""
														
 
															+            product_id = product_data.get('product_id', f"product_{len(results)}")
														
 
															+            
														
 
															+            try:
														
 
															+                # Process image if URL is provided
														
 
															+                ocr_results = None
														
 
															+                ocr_text = None
														
 
															+                
														
 
															+                if process_image and product_data.get('image_url'):
														
 
															+                    ocr_results = ocr_service.process_image(product_data['image_url'])
														
 
															+                    
														
 
															+                    # Extract attributes from OCR
														
 
															+                    if ocr_results and ocr_results.get('detected_text'):
														
 
															+                        ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
														
 
															+                            ocr_results, model
														
 
															+                        )
														
 
															+                        ocr_results['extracted_attributes'] = ocr_attrs
														
 
															+                        
														
 
															+                        # Format OCR text for combining with product text
														
 
															+                        ocr_text = "\n".join([
														
 
															+                            f"{item['text']} (confidence: {item['confidence']:.2f})"
														
 
															+                            for item in ocr_results['detected_text']
														
 
															+                        ])
														
 
															+                
														
 
															+                # Combine all product information
														
 
															+                product_text = ProductAttributeService.combine_product_text(
														
 
															+                    title=product_data.get('title'),
														
 
															+                    short_desc=product_data.get('short_desc'),
														
 
															+                    long_desc=product_data.get('long_desc'),
														
 
															+                    ocr_text=ocr_text
														
 
															+                )
														
 
															+                
														
 
															+                # Extract attributes from combined text
														
 
															+                result = ProductAttributeService.extract_attributes(
														
 
															+                    product_text=product_text,
														
 
															+                    mandatory_attrs=mandatory_attrs,
														
 
															+                    model=model,
														
 
															+                    extract_additional=extract_additional
														
 
															+                )
														
 
															+                
														
 
															+                result['product_id'] = product_id
														
 
															+                
														
 
															+                # Add OCR results if available
														
 
															+                if ocr_results:
														
 
															+                    result['ocr_results'] = ocr_results
														
 
															+                
														
 
															+                # Check if extraction was successful
														
 
															+                if 'error' not in result:
														
 
															+                    return result, True
														
 
															+                else:
														
 
															+                    return result, False
														
 
															+                    
														
 
															+            except Exception as e:
														
 
															+                return {
														
 
															+                    'product_id': product_id,
														
 
															+                    'mandatory': {attr: "Not Specified" for attr in mandatory_attrs.keys()},
														
 
															+                    'additional': {} if extract_additional else None,
														
 
															+                    'error': f"Processing error: {str(e)}"
														
 
															+                }, False
														
 
															+
														
 
															+        # Process products in parallel
														
 
															+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
														
 
															+            future_to_product = {
														
 
															+                executor.submit(process_product, product): product 
														
 
															+                for product in products
														
 
															+            }
														
 
															+            
														
 
															+            for future in as_completed(future_to_product):
														
 
															+                try:
														
 
															+                    result, success = future.result()
														
 
															+                    results.append(result)
														
 
															+                    if success:
														
 
															+                        successful += 1
														
 
															+                    else:
														
 
															+                        failed += 1
														
 
															+                except Exception as e:
														
 
															+                    failed += 1
														
 
															+                    results.append({
														
 
															+                        'product_id': 'unknown',
														
 
															+                        'mandatory': {attr: "Not Specified" for attr in mandatory_attrs.keys()},
														
 
															+                        'additional': {} if extract_additional else None,
														
 
															+                        'error': f"Unexpected error: {str(e)}"
														
 
															+                    })
														
 
															+
														
 
															+        return {
														
 
															+            'results': results,
														
 
															+            'total_products': len(products),
														
 
															+            'successful': successful,
														
 
															+            'failed': failed
														
 
															+        }
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def _clean_json_response(text: str) -> str:
														
 
															+        """Clean LLM response to extract valid JSON."""
														
 
															+        start_idx = text.find('{')
														
 
															+        end_idx = text.rfind('}')
														
 
															+
														
 
															+        if start_idx != -1 and end_idx != -1:
														
 
															+            text = text[start_idx:end_idx + 1]
														
 
															+
														
 
															+        if "```json" in text:
														
 
															+            text = text.split("```json")[1].split("```")[0].strip()
														
 
															+        elif "```" in text:
														
 
															+            text = text.split("```")[1].split("```")[0].strip()
														
 
															+            if text.startswith("json"):
														
 
															+                text = text[4:].strip()
														
 
															+
														
 
															+        return text
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def _validate_response_structure(
														
 
															+        parsed: dict,
														
 
															+        mandatory_attrs: Dict[str, List[str]],
														
 
															+        extract_additional: bool
														
 
															+    ) -> dict:
														
 
															+        """Validate and fix the response structure."""
														
 
															+        expected_sections = ["mandatory"]
														
 
															+        if extract_additional:
														
 
															+            expected_sections.append("additional")
														
 
															+
														
 
															+        if not all(section in parsed for section in expected_sections):
														
 
															+            if isinstance(parsed, dict):
														
 
															+                mandatory_keys = set(mandatory_attrs.keys())
														
 
															+                mandatory = {k: v for k, v in parsed.items() if k in mandatory_keys}
														
 
															+                additional = {k: v for k, v in parsed.items() if k not in mandatory_keys}
														
 
															+
														
 
															+                result = {"mandatory": mandatory}
														
 
															+                if extract_additional:
														
 
															+                    result["additional"] = additional
														
 
															+                return result
														
 
															+            else:
														
 
															+                return ProductAttributeService._create_error_response(
														
 
															+                    "Invalid response structure",
														
 
															+                    mandatory_attrs,
														
 
															+                    extract_additional,
														
 
															+                    str(parsed)
														
 
															+                )
														
 
															+
														
 
															+        return parsed
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def _create_error_response(
														
 
															+        error: str,
														
 
															+        mandatory_attrs: Dict[str, List[str]],
														
 
															+        extract_additional: bool,
														
 
															+        raw_output: Optional[str] = None
														
 
															+    ) -> dict:
														
 
															+        """Create a standardized error response."""
														
 
															+        response = {
														
 
															+            "mandatory": {attr: "Not Specified" for attr in mandatory_attrs.keys()},
														
 
															+            "error": error
														
 
															+        }
														
 
															+        if extract_additional:
														
 
															+            response["additional"] = {}
														
 
															+        if raw_output:
														
 
															+            response["raw_output"] = raw_output
														
 
															+        return response
														
 
															+
														
--- a/attr_extraction/tests.py
+++ b/attr_extraction/tests.py
@@ -0,0 +1,3 @@
 
															+from django.test import TestCase
														
 
															+
														
 
															+# Create your tests here.
														
--- a/attr_extraction/urls.py
+++ b/attr_extraction/urls.py
@@ -0,0 +1,9 @@
 
															+# ==================== urls.py ====================
														
 
															+from django.urls import path
														
 
															+from .views import ExtractProductAttributesView, BatchExtractProductAttributesView, ProductListView
														
 
															+
														
 
															+urlpatterns = [
														
 
															+    path('extract/', ExtractProductAttributesView.as_view(), name='extract-attributes'),
														
 
															+    path('batch-extract/', BatchExtractProductAttributesView.as_view(), name='batch-extract-attributes'),
														
 
															+    path('products/', ProductListView.as_view(), name='batch-extract-attributes'),
														
 
															+]
														
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -0,0 +1,352 @@
 
															+# #  #==================== views.py ====================
														
 
															+# # from rest_framework.views import APIView
														
 
															+# # from rest_framework.response import Response
														
 
															+# # from rest_framework import status
														
 
															+# # from .serializers import (
														
 
															+# #     ProductAttributeRequestSerializer,
														
 
															+# #     ProductAttributeResponseSerializer
														
 
															+# # )
														
 
															+# # from .services import ProductAttributeService
														
 
															+
														
 
															+
														
 
															+# # class ExtractProductAttributesView(APIView):
														
 
															+# #     """
														
 
															+# #     API endpoint to extract product attributes using Groq LLM.
														
 
															+    
														
 
															+# #     POST /api/extract-attributes/
														
 
															+    
														
 
															+# #     Request Body:
														
 
															+# #     {
														
 
															+# #         "title": "Product title (optional)",
														
 
															+# #         "short_desc": "Short description (optional)",
														
 
															+# #         "long_desc": "Long description (optional)",
														
 
															+# #         "mandatory_attrs": {
														
 
															+# #             "Attribute1": ["value1", "value2", "value3"],
														
 
															+# #             "Attribute2": ["valueA", "valueB"]
														
 
															+# #         },
														
 
															+# #         "model": "llama-3.1-8b-instant (optional)",
														
 
															+# #         "extract_additional": true (optional, default: true)
														
 
															+# #     }
														
 
															+    
														
 
															+# #     Response:
														
 
															+# #     {
														
 
															+# #         "mandatory": {
														
 
															+# #             "Attribute1": "value1",
														
 
															+# #             "Attribute2": "valueA"
														
 
															+# #         },
														
 
															+# #         "additional": {
														
 
															+# #             "Color": "Blue",
														
 
															+# #             "Brand": "Example"
														
 
															+# #         }
														
 
															+# #     }
														
 
															+# #     """
														
 
															+
														
 
															+# #     def post(self, request):
														
 
															+# #         # Validate request data
														
 
															+# #         serializer = ProductAttributeRequestSerializer(data=request.data)
														
 
															+# #         if not serializer.is_valid():
														
 
															+# #             return Response(
														
 
															+# #                 {"error": serializer.errors},
														
 
															+# #                 status=status.HTTP_400_BAD_REQUEST
														
 
															+# #             )
														
 
															+
														
 
															+# #         validated_data = serializer.validated_data
														
 
															+
														
 
															+# #         # Combine product text
														
 
															+# #         product_text = ProductAttributeService.combine_product_text(
														
 
															+# #             title=validated_data.get('title'),
														
 
															+# #             short_desc=validated_data.get('short_desc'),
														
 
															+# #             long_desc=validated_data.get('long_desc')
														
 
															+# #         )
														
 
															+
														
 
															+# #         # Extract attributes
														
 
															+# #         result = ProductAttributeService.extract_attributes(
														
 
															+# #             product_text=product_text,
														
 
															+# #             mandatory_attrs=validated_data['mandatory_attrs'],
														
 
															+# #             model=validated_data.get('model'),
														
 
															+# #             extract_additional=validated_data.get('extract_additional', True)
														
 
															+# #         )
														
 
															+
														
 
															+# #         # Return response
														
 
															+# #         response_serializer = ProductAttributeResponseSerializer(data=result)
														
 
															+# #         if response_serializer.is_valid():
														
 
															+# #             return Response(response_serializer.data, status=status.HTTP_200_OK)
														
 
															+        
														
 
															+# #         return Response(result, status=status.HTTP_200_OK)
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+# from rest_framework.views import APIView
														
 
															+# from rest_framework.response import Response
														
 
															+# from rest_framework import status
														
 
															+# from .serializers import (
														
 
															+#     SingleProductRequestSerializer,
														
 
															+#     BatchProductRequestSerializer,
														
 
															+#     ProductAttributeResultSerializer,
														
 
															+#     BatchProductResponseSerializer
														
 
															+# )
														
 
															+# from .services import ProductAttributeService
														
 
															+
														
 
															+
														
 
															+# class ExtractProductAttributesView(APIView):
														
 
															+#     """
														
 
															+#     API endpoint to extract product attributes for a single product.
														
 
															+    
														
 
															+#     POST /api/extract-attributes/
														
 
															+    
														
 
															+#     Request Body:
														
 
															+#     {
														
 
															+#         "title": "Product title (optional)",
														
 
															+#         "short_desc": "Short description (optional)",
														
 
															+#         "long_desc": "Long description (optional)",
														
 
															+#         "mandatory_attrs": {
														
 
															+#             "Attribute1": ["value1", "value2", "value3"],
														
 
															+#             "Attribute2": ["valueA", "valueB"]
														
 
															+#         },
														
 
															+#         "model": "llama-3.1-8b-instant (optional)",
														
 
															+#         "extract_additional": true (optional, default: true)
														
 
															+#     }
														
 
															+#     """
														
 
															+
														
 
															+#     def post(self, request):
														
 
															+#         serializer = SingleProductRequestSerializer(data=request.data)
														
 
															+#         if not serializer.is_valid():
														
 
															+#             return Response(
														
 
															+#                 {"error": serializer.errors},
														
 
															+#                 status=status.HTTP_400_BAD_REQUEST
														
 
															+#             )
														
 
															+
														
 
															+#         validated_data = serializer.validated_data
														
 
															+
														
 
															+#         product_text = ProductAttributeService.combine_product_text(
														
 
															+#             title=validated_data.get('title'),
														
 
															+#             short_desc=validated_data.get('short_desc'),
														
 
															+#             long_desc=validated_data.get('long_desc')
														
 
															+#         )
														
 
															+
														
 
															+#         result = ProductAttributeService.extract_attributes(
														
 
															+#             product_text=product_text,
														
 
															+#             mandatory_attrs=validated_data['mandatory_attrs'],
														
 
															+#             model=validated_data.get('model'),
														
 
															+#             extract_additional=validated_data.get('extract_additional', True)
														
 
															+#         )
														
 
															+
														
 
															+#         response_serializer = ProductAttributeResultSerializer(data=result)
														
 
															+#         if response_serializer.is_valid():
														
 
															+#             return Response(response_serializer.data, status=status.HTTP_200_OK)
														
 
															+        
														
 
															+#         return Response(result, status=status.HTTP_200_OK)
														
 
															+
														
 
															+
														
 
															+# class BatchExtractProductAttributesView(APIView):
														
 
															+#     """
														
 
															+#     API endpoint to extract product attributes for multiple products in batch.
														
 
															+    
														
 
															+#     POST /api/batch-extract-attributes/
														
 
															+    
														
 
															+#     Request Body:
														
 
															+#     {
														
 
															+#         "products": [
														
 
															+#             {
														
 
															+#                 "product_id": "prod_001",
														
 
															+#                 "title": "Product 1 title",
														
 
															+#                 "short_desc": "Short description",
														
 
															+#                 "long_desc": "Long description"
														
 
															+#             },
														
 
															+#             {
														
 
															+#                 "product_id": "prod_002",
														
 
															+#                 "title": "Product 2 title",
														
 
															+#                 "short_desc": "Short description"
														
 
															+#             }
														
 
															+#         ],
														
 
															+#         "mandatory_attrs": {
														
 
															+#             "Attribute1": ["value1", "value2", "value3"],
														
 
															+#             "Attribute2": ["valueA", "valueB"]
														
 
															+#         },
														
 
															+#         "model": "llama-3.1-8b-instant (optional)",
														
 
															+#         "extract_additional": true (optional, default: true)
														
 
															+#     }
														
 
															+    
														
 
															+#     Response:
														
 
															+#     {
														
 
															+#         "results": [
														
 
															+#             {
														
 
															+#                 "product_id": "prod_001",
														
 
															+#                 "mandatory": {...},
														
 
															+#                 "additional": {...}
														
 
															+#             },
														
 
															+#             {
														
 
															+#                 "product_id": "prod_002",
														
 
															+#                 "mandatory": {...},
														
 
															+#                 "additional": {...}
														
 
															+#             }
														
 
															+#         ],
														
 
															+#         "total_products": 2,
														
 
															+#         "successful": 2,
														
 
															+#         "failed": 0
														
 
															+#     }
														
 
															+#     """
														
 
															+
														
 
															+#     def post(self, request):
														
 
															+#         serializer = BatchProductRequestSerializer(data=request.data)
														
 
															+#         if not serializer.is_valid():
														
 
															+#             return Response(
														
 
															+#                 {"error": serializer.errors},
														
 
															+#                 status=status.HTTP_400_BAD_REQUEST
														
 
															+#             )
														
 
															+
														
 
															+#         validated_data = serializer.validated_data
														
 
															+
														
 
															+#         # Extract attributes for all products in batch
														
 
															+#         result = ProductAttributeService.extract_attributes_batch(
														
 
															+#             products=validated_data['products'],
														
 
															+#             mandatory_attrs=validated_data['mandatory_attrs'],
														
 
															+#             model=validated_data.get('model'),
														
 
															+#             extract_additional=validated_data.get('extract_additional', True)
														
 
															+#         )
														
 
															+
														
 
															+#         response_serializer = BatchProductResponseSerializer(data=result)
														
 
															+#         if response_serializer.is_valid():
														
 
															+#             return Response(response_serializer.data, status=status.HTTP_200_OK)
														
 
															+        
														
 
															+#         return Response(result, status=status.HTTP_200_OK)
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+# ==================== views.py ====================
														
 
															+from rest_framework.views import APIView
														
 
															+from rest_framework.response import Response
														
 
															+from rest_framework import status
														
 
															+from .serializers import (
														
 
															+    SingleProductRequestSerializer,
														
 
															+    BatchProductRequestSerializer,
														
 
															+    ProductAttributeResultSerializer,
														
 
															+    BatchProductResponseSerializer
														
 
															+)
														
 
															+from .services import ProductAttributeService
														
 
															+from .ocr_service import OCRService
														
 
															+
														
 
															+
														
 
															+class ExtractProductAttributesView(APIView):
														
 
															+    """
														
 
															+    API endpoint to extract product attributes for a single product.
														
 
															+    Now supports image URL for OCR-based text extraction.
														
 
															+    """
														
 
															+
														
 
															+    def post(self, request):
														
 
															+        serializer = SingleProductRequestSerializer(data=request.data)
														
 
															+        if not serializer.is_valid():
														
 
															+            return Response(
														
 
															+                {"error": serializer.errors},
														
 
															+                status=status.HTTP_400_BAD_REQUEST
														
 
															+            )
														
 
															+
														
 
															+        validated_data = serializer.validated_data
														
 
															+        
														
 
															+        # Process image if URL provided
														
 
															+        ocr_results = None
														
 
															+        ocr_text = None
														
 
															+        
														
 
															+        if validated_data.get('process_image', True) and validated_data.get('image_url'):
														
 
															+            ocr_service = OCRService()
														
 
															+            ocr_results = ocr_service.process_image(validated_data['image_url'])
														
 
															+            
														
 
															+            # Extract attributes from OCR
														
 
															+            if ocr_results and ocr_results.get('detected_text'):
														
 
															+                ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
														
 
															+                    ocr_results,
														
 
															+                    validated_data.get('model')
														
 
															+                )
														
 
															+                ocr_results['extracted_attributes'] = ocr_attrs
														
 
															+                
														
 
															+                # Format OCR text
														
 
															+                ocr_text = "\n".join([
														
 
															+                    f"{item['text']} (confidence: {item['confidence']:.2f})"
														
 
															+                    for item in ocr_results['detected_text']
														
 
															+                ])
														
 
															+
														
 
															+        # Combine all product information
														
 
															+        product_text = ProductAttributeService.combine_product_text(
														
 
															+            title=validated_data.get('title'),
														
 
															+            short_desc=validated_data.get('short_desc'),
														
 
															+            long_desc=validated_data.get('long_desc'),
														
 
															+            ocr_text=ocr_text
														
 
															+        )
														
 
															+
														
 
															+        # Extract attributes
														
 
															+        result = ProductAttributeService.extract_attributes(
														
 
															+            product_text=product_text,
														
 
															+            mandatory_attrs=validated_data['mandatory_attrs'],
														
 
															+            model=validated_data.get('model'),
														
 
															+            extract_additional=validated_data.get('extract_additional', True)
														
 
															+        )
														
 
															+        
														
 
															+        # Add OCR results if available
														
 
															+        if ocr_results:
														
 
															+            result['ocr_results'] = ocr_results
														
 
															+
														
 
															+        response_serializer = ProductAttributeResultSerializer(data=result)
														
 
															+        if response_serializer.is_valid():
														
 
															+            return Response(response_serializer.data, status=status.HTTP_200_OK)
														
 
															+        
														
 
															+        return Response(result, status=status.HTTP_200_OK)
														
 
															+
														
 
															+
														
 
															+class BatchExtractProductAttributesView(APIView):
														
 
															+    """
														
 
															+    API endpoint to extract product attributes for multiple products in batch.
														
 
															+    Now supports image URLs for OCR-based text extraction.
														
 
															+    """
														
 
															+
														
 
															+    def post(self, request):
														
 
															+        serializer = BatchProductRequestSerializer(data=request.data)
														
 
															+        if not serializer.is_valid():
														
 
															+            return Response(
														
 
															+                {"error": serializer.errors},
														
 
															+                status=status.HTTP_400_BAD_REQUEST
														
 
															+            )
														
 
															+
														
 
															+        validated_data = serializer.validated_data
														
 
															+
														
 
															+        # Extract attributes for all products in batch
														
 
															+        result = ProductAttributeService.extract_attributes_batch(
														
 
															+            products=validated_data['products'],
														
 
															+            mandatory_attrs=validated_data['mandatory_attrs'],
														
 
															+            model=validated_data.get('model'),
														
 
															+            extract_additional=validated_data.get('extract_additional', True),
														
 
															+            process_image=validated_data.get('process_image', True)
														
 
															+        )
														
 
															+
														
 
															+        response_serializer = BatchProductResponseSerializer(data=result)
														
 
															+        if response_serializer.is_valid():
														
 
															+            return Response(response_serializer.data, status=status.HTTP_200_OK)
														
 
															+        
														
 
															+        return Response(result, status=status.HTTP_200_OK)
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+from rest_framework.views import APIView
														
 
															+from rest_framework.response import Response
														
 
															+from rest_framework import status
														
 
															+from .models import Product
														
 
															+from .serializers import ProductSerializer
														
 
															+
														
 
															+class ProductListView(APIView):
														
 
															+    """
														
 
															+    GET API to list all products with details
														
 
															+    """
														
 
															+    def get(self, request):
														
 
															+        products = Product.objects.all()
														
 
															+        serializer = ProductSerializer(products, many=True)
														
 
															+        return Response(serializer.data, status=status.HTTP_200_OK)
														
 
															+
														
--- a/content_quality_tool/__pycache__/settings.cpython-313.pyc
+++ b/content_quality_tool/__pycache__/settings.cpython-313.pyc
--- a/content_quality_tool/__pycache__/urls.cpython-313.pyc
+++ b/content_quality_tool/__pycache__/urls.cpython-313.pyc
--- a/content_quality_tool/settings.py
+++ b/content_quality_tool/settings.py
@@ -29,6 +29,7 @@ INSTALLED_APPS = [
 
															     'django.contrib.staticfiles',
														
 
															     'core',
														
 
															     'rest_framework',
														
 
															+    'attr_extraction',
														
 
															 ]
														
 
															 MIDDLEWARE = [
														
 
															     'django.middleware.security.SecurityMiddleware',
														
@@ -118,3 +119,7 @@ MESSAGE_TAGS = {
 
															+GROQ_API_KEY = "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"
														
 
															+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
														
 
															+SUPPORTED_MODELS = ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]
														
 
															+MAX_BATCH_SIZE = 100  # Maximum products per batch request
														
--- a/content_quality_tool/urls.py
+++ b/content_quality_tool/urls.py
@@ -29,6 +29,7 @@ urlpatterns = [
 
															     # api url
														
 
															     path("core/", include("core.urls")),
														
 
															+    path("attr/", include("attr_extraction.urls")),
														
 
															     # path("", views.login_view, name="login_view"),
														
 
															 ]
														
--- a/core/__pycache__/models.cpython-313.pyc
+++ b/core/__pycache__/models.cpython-313.pyc
--- a/core/services/__pycache__/attribute_scorer.cpython-313.pyc
+++ b/core/services/__pycache__/attribute_scorer.cpython-313.pyc
--- a/db.sqlite3
+++ b/db.sqlite3