пре 3 месеци · ae7867812b
--- a/attr_extraction/__init__.py
+++ b/attr_extraction/__init__.py
--- a/attr_extraction/admin.py
+++ b/attr_extraction/admin.py
@@ -0,0 +1,3 @@
 
				+from django.contrib import admin
			
 
				+
			
 
				+# Register your models here.
			
--- a/attr_extraction/apps.py
+++ b/attr_extraction/apps.py
@@ -0,0 +1,6 @@
 
				+from django.apps import AppConfig
			
 
				+
			
 
				+
			
 
				+class AttrExtractionConfig(AppConfig):
			
 
				+    default_auto_field = 'django.db.models.BigAutoField'
			
 
				+    name = 'attr_extraction'
			
--- a/attr_extraction/migrations/0001_initial.py
+++ b/attr_extraction/migrations/0001_initial.py
@@ -0,0 +1,27 @@
 
				+# Generated by Django 5.2.7 on 2025-10-17 10:21
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    initial = True
			
 
				+
			
 
				+    dependencies = [
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.CreateModel(
			
 
				+            name='Product',
			
 
				+            fields=[
			
 
				+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
			
 
				+                ('item_id', models.CharField(max_length=100, unique=True)),
			
 
				+                ('product_name', models.CharField(max_length=255)),
			
 
				+                ('product_long_description', models.TextField(blank=True, null=True)),
			
 
				+                ('product_short_description', models.TextField(blank=True, null=True)),
			
 
				+                ('product_type', models.CharField(blank=True, max_length=100, null=True)),
			
 
				+                ('image_path', models.CharField(blank=True, max_length=500, null=True)),
			
 
				+                ('image', models.ImageField(blank=True, null=True, upload_to='products/')),
			
 
				+            ],
			
 
				+        ),
			
 
				+    ]
			
--- a/attr_extraction/migrations/__init__.py
+++ b/attr_extraction/migrations/__init__.py
--- a/attr_extraction/models.py
+++ b/attr_extraction/models.py
@@ -0,0 +1,16 @@
 
				+from django.db import models
			
 
				+
			
 
				+class Product(models.Model):
			
 
				+    """
			
 
				+    Stores product details
			
 
				+    """
			
 
				+    item_id = models.CharField(max_length=100, unique=True)
			
 
				+    product_name = models.CharField(max_length=255)
			
 
				+    product_long_description = models.TextField(blank=True, null=True)
			
 
				+    product_short_description = models.TextField(blank=True, null=True)
			
 
				+    product_type = models.CharField(max_length=100, blank=True, null=True)
			
 
				+    image_path = models.CharField(max_length=500, blank=True, null=True)
			
 
				+    image = models.ImageField(upload_to='products/', blank=True, null=True)
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return f"{self.product_name} ({self.item_id})"
			
--- a/attr_extraction/ocr_service.py
+++ b/attr_extraction/ocr_service.py
@@ -0,0 +1,151 @@
 
				+# ==================== ocr_service.py ====================
			
 
				+import cv2
			
 
				+import easyocr
			
 
				+import numpy as np
			
 
				+import re
			
 
				+import requests
			
 
				+from io import BytesIO
			
 
				+from PIL import Image
			
 
				+from typing import List, Tuple, Dict, Optional
			
 
				+import logging
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class OCRService:
			
 
				+    """Service for extracting text from product images using OCR."""
			
 
				+    
			
 
				+    def __init__(self):
			
 
				+        self.reader = None
			
 
				+    
			
 
				+    def _get_reader(self):
			
 
				+        """Lazy load EasyOCR reader."""
			
 
				+        if self.reader is None:
			
 
				+            self.reader = easyocr.Reader(['en'], gpu=False)
			
 
				+        return self.reader
			
 
				+    
			
 
				+    def download_image(self, image_url: str) -> Optional[np.ndarray]:
			
 
				+        """Download image from URL and convert to OpenCV format."""
			
 
				+        try:
			
 
				+            response = requests.get(image_url, timeout=10)
			
 
				+            response.raise_for_status()
			
 
				+            
			
 
				+            # Convert to PIL Image then to OpenCV format
			
 
				+            pil_image = Image.open(BytesIO(response.content))
			
 
				+            image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
			
 
				+            return image
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"Error downloading image from {image_url}: {str(e)}")
			
 
				+            return None
			
 
				+    
			
 
				+    def preprocess_horizontal(self, image: np.ndarray) -> np.ndarray:
			
 
				+        """Preprocess image for horizontal text."""
			
 
				+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
			
 
				+        enhanced = cv2.GaussianBlur(gray, (5, 5), 0)
			
 
				+        _, binary = cv2.threshold(enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
			
 
				+        return binary
			
 
				+    
			
 
				+    def preprocess_vertical(self, image: np.ndarray) -> np.ndarray:
			
 
				+        """Preprocess image for vertical text."""
			
 
				+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
			
 
				+        enhanced = cv2.equalizeHist(gray)
			
 
				+        thresh = cv2.adaptiveThreshold(
			
 
				+            enhanced, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
			
 
				+        )
			
 
				+        return thresh
			
 
				+    
			
 
				+    def detect_text_regions(self, image: np.ndarray, preprocess_func) -> List[Tuple]:
			
 
				+        """Detect text regions using contours."""
			
 
				+        processed = preprocess_func(image)
			
 
				+        contours, _ = cv2.findContours(processed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+        text_regions = []
			
 
				+        for contour in contours:
			
 
				+            x, y, w, h = cv2.boundingRect(contour)
			
 
				+            if w > 30 and h > 30:  # Filter small regions
			
 
				+                aspect_ratio = h / w
			
 
				+                text_regions.append((x, y, w, h, aspect_ratio))
			
 
				+        return text_regions
			
 
				+    
			
 
				+    def classify_and_extract_text(self, image: np.ndarray, regions: List[Tuple]) -> List[Tuple]:
			
 
				+        """Classify regions as horizontal or vertical and extract text."""
			
 
				+        reader = self._get_reader()
			
 
				+        all_detected_text = []
			
 
				+        
			
 
				+        for (x, y, w, h, aspect_ratio) in regions:
			
 
				+            roi = image[y:y + h, x:x + w]
			
 
				+            if aspect_ratio > 1.5:  # Vertical text
			
 
				+                roi = cv2.rotate(roi, cv2.ROTATE_90_CLOCKWISE)
			
 
				+            
			
 
				+            results = reader.readtext(roi, detail=1)
			
 
				+            for _, text, confidence in results:
			
 
				+                all_detected_text.append((text, confidence))
			
 
				+        
			
 
				+        return all_detected_text
			
 
				+    
			
 
				+    def clean_ocr_output(self, ocr_results: List[Tuple], confidence_threshold: float = 0.40) -> List[Tuple]:
			
 
				+        """Clean OCR results by removing unwanted characters and low-confidence detections."""
			
 
				+        cleaned_results = []
			
 
				+        for text, confidence in ocr_results:
			
 
				+            if confidence < confidence_threshold:
			
 
				+                continue
			
 
				+            
			
 
				+            # Remove unwanted characters using regex
			
 
				+            cleaned_text = re.sub(r"[^A-Za-z0-9\s\.\,\(\)\-\%\/]", "", text)
			
 
				+            cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
			
 
				+            
			
 
				+            # Remove unwanted numeric characters like single digits
			
 
				+            if len(cleaned_text) == 1 and cleaned_text.isdigit():
			
 
				+                continue
			
 
				+            
			
 
				+            if any(char.isdigit() for char in cleaned_text) and len(cleaned_text) < 2:
			
 
				+                continue
			
 
				+            
			
 
				+            if len(cleaned_text.strip()) > 0:
			
 
				+                cleaned_results.append((cleaned_text.strip(), confidence))
			
 
				+        
			
 
				+        return cleaned_results
			
 
				+    
			
 
				+    def process_image(self, image_url: str) -> Dict:
			
 
				+        """Main method to process image and extract text."""
			
 
				+        try:
			
 
				+            # Download image
			
 
				+            image = self.download_image(image_url)
			
 
				+            if image is None:
			
 
				+                return {
			
 
				+                    "detected_text": [],
			
 
				+                    "extracted_attributes": {},
			
 
				+                    "error": "Failed to download image"
			
 
				+                }
			
 
				+            
			
 
				+            # Detect and process horizontal text
			
 
				+            horizontal_regions = self.detect_text_regions(image, self.preprocess_horizontal)
			
 
				+            horizontal_text = self.classify_and_extract_text(image, horizontal_regions)
			
 
				+            
			
 
				+            # Detect and process vertical text
			
 
				+            vertical_regions = self.detect_text_regions(image, self.preprocess_vertical)
			
 
				+            vertical_text = self.classify_and_extract_text(image, vertical_regions)
			
 
				+            
			
 
				+            # Combine results
			
 
				+            all_text = horizontal_text + vertical_text
			
 
				+            
			
 
				+            # Clean results
			
 
				+            cleaned_results = self.clean_ocr_output(all_text, confidence_threshold=0.40)
			
 
				+            
			
 
				+            # Format for response
			
 
				+            detected_text = [
			
 
				+                {"text": text, "confidence": float(confidence)} 
			
 
				+                for text, confidence in cleaned_results
			
 
				+            ]
			
 
				+            
			
 
				+            return {
			
 
				+                "detected_text": detected_text,
			
 
				+                "extracted_attributes": {}
			
 
				+            }
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"Error processing image: {str(e)}")
			
 
				+            return {
			
 
				+                "detected_text": [],
			
 
				+                "extracted_attributes": {},
			
 
				+                "error": str(e)
			
 
				+            }
			
--- a/attr_extraction/serializers.py
+++ b/attr_extraction/serializers.py
@@ -0,0 +1,196 @@
 
				+# from rest_framework import serializers
			
 
				+
			
 
				+# class ProductInputSerializer(serializers.Serializer):
			
 
				+#     """Serializer for individual product input."""
			
 
				+#     product_id = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+
			
 
				+
			
 
				+# class SingleProductRequestSerializer(serializers.Serializer):
			
 
				+#     """Serializer for single product extraction request."""
			
 
				+#     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+#     mandatory_attrs = serializers.DictField(
			
 
				+#         child=serializers.ListField(child=serializers.CharField()),
			
 
				+#         required=True
			
 
				+#     )
			
 
				+#     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
			
 
				+#     extract_additional = serializers.BooleanField(required=False, default=True)
			
 
				+
			
 
				+#     def validate_model(self, value):
			
 
				+#         from django.conf import settings
			
 
				+#         if value not in settings.SUPPORTED_MODELS:
			
 
				+#             raise serializers.ValidationError(
			
 
				+#                 f"Model must be one of {settings.SUPPORTED_MODELS}"
			
 
				+#             )
			
 
				+#         return value
			
 
				+
			
 
				+
			
 
				+# class BatchProductRequestSerializer(serializers.Serializer):
			
 
				+#     """Serializer for batch product extraction request."""
			
 
				+#     products = serializers.ListField(
			
 
				+#         child=ProductInputSerializer(),
			
 
				+#         required=True,
			
 
				+#         min_length=1
			
 
				+#     )
			
 
				+#     mandatory_attrs = serializers.DictField(
			
 
				+#         child=serializers.ListField(child=serializers.CharField()),
			
 
				+#         required=True
			
 
				+#     )
			
 
				+#     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
			
 
				+#     extract_additional = serializers.BooleanField(required=False, default=True)
			
 
				+
			
 
				+#     def validate_model(self, value):
			
 
				+#         from django.conf import settings
			
 
				+#         if value not in settings.SUPPORTED_MODELS:
			
 
				+#             raise serializers.ValidationError(
			
 
				+#                 f"Model must be one of {settings.SUPPORTED_MODELS}"
			
 
				+#             )
			
 
				+#         return value
			
 
				+
			
 
				+#     def validate_products(self, value):
			
 
				+#         from django.conf import settings
			
 
				+#         max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
			
 
				+#         if len(value) > max_size:
			
 
				+#             raise serializers.ValidationError(
			
 
				+#                 f"Batch size cannot exceed {max_size} products"
			
 
				+#             )
			
 
				+#         return value
			
 
				+
			
 
				+
			
 
				+# class ProductAttributeResultSerializer(serializers.Serializer):
			
 
				+#     """Serializer for individual product extraction result."""
			
 
				+#     product_id = serializers.CharField(required=False)
			
 
				+#     mandatory = serializers.DictField()
			
 
				+#     additional = serializers.DictField(required=False)
			
 
				+#     error = serializers.CharField(required=False)
			
 
				+#     raw_output = serializers.CharField(required=False)
			
 
				+
			
 
				+
			
 
				+# class BatchProductResponseSerializer(serializers.Serializer):
			
 
				+#     """Serializer for batch extraction response."""
			
 
				+#     results = serializers.ListField(child=ProductAttributeResultSerializer())
			
 
				+#     total_products = serializers.IntegerField()
			
 
				+#     successful = serializers.IntegerField()
			
 
				+#     failed = serializers.IntegerField()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== serializers.py ====================
			
 
				+from rest_framework import serializers
			
 
				+
			
 
				+class ProductInputSerializer(serializers.Serializer):
			
 
				+    """Serializer for individual product input."""
			
 
				+    product_id = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+    title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+    short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+    long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+    image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
			
 
				+
			
 
				+
			
 
				+class SingleProductRequestSerializer(serializers.Serializer):
			
 
				+    """Serializer for single product extraction request."""
			
 
				+    title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+    short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+    long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
			
 
				+    image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
			
 
				+    mandatory_attrs = serializers.DictField(
			
 
				+        child=serializers.ListField(child=serializers.CharField()),
			
 
				+        required=True
			
 
				+    )
			
 
				+    model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
			
 
				+    extract_additional = serializers.BooleanField(required=False, default=True)
			
 
				+    process_image = serializers.BooleanField(required=False, default=True)
			
 
				+
			
 
				+    def validate_model(self, value):
			
 
				+        from django.conf import settings
			
 
				+        if value not in settings.SUPPORTED_MODELS:
			
 
				+            raise serializers.ValidationError(
			
 
				+                f"Model must be one of {settings.SUPPORTED_MODELS}"
			
 
				+            )
			
 
				+        return value
			
 
				+
			
 
				+
			
 
				+class BatchProductRequestSerializer(serializers.Serializer):
			
 
				+    """Serializer for batch product extraction request."""
			
 
				+    products = serializers.ListField(
			
 
				+        child=ProductInputSerializer(),
			
 
				+        required=True,
			
 
				+        min_length=1
			
 
				+    )
			
 
				+    mandatory_attrs = serializers.DictField(
			
 
				+        child=serializers.ListField(child=serializers.CharField()),
			
 
				+        required=True
			
 
				+    )
			
 
				+    model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
			
 
				+    extract_additional = serializers.BooleanField(required=False, default=True)
			
 
				+    process_image = serializers.BooleanField(required=False, default=True)
			
 
				+
			
 
				+    def validate_model(self, value):
			
 
				+        from django.conf import settings
			
 
				+        if value not in settings.SUPPORTED_MODELS:
			
 
				+            raise serializers.ValidationError(
			
 
				+                f"Model must be one of {settings.SUPPORTED_MODELS}"
			
 
				+            )
			
 
				+        return value
			
 
				+
			
 
				+    def validate_products(self, value):
			
 
				+        from django.conf import settings
			
 
				+        max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
			
 
				+        if len(value) > max_size:
			
 
				+            raise serializers.ValidationError(
			
 
				+                f"Batch size cannot exceed {max_size} products"
			
 
				+            )
			
 
				+        return value
			
 
				+
			
 
				+
			
 
				+class OCRResultSerializer(serializers.Serializer):
			
 
				+    """Serializer for OCR results."""
			
 
				+    detected_text = serializers.ListField(child=serializers.DictField())
			
 
				+    extracted_attributes = serializers.DictField()
			
 
				+
			
 
				+
			
 
				+class ProductAttributeResultSerializer(serializers.Serializer):
			
 
				+    """Serializer for individual product extraction result."""
			
 
				+    product_id = serializers.CharField(required=False)
			
 
				+    mandatory = serializers.DictField()
			
 
				+    additional = serializers.DictField(required=False)
			
 
				+    ocr_results = OCRResultSerializer(required=False)
			
 
				+    error = serializers.CharField(required=False)
			
 
				+    raw_output = serializers.CharField(required=False)
			
 
				+
			
 
				+
			
 
				+class BatchProductResponseSerializer(serializers.Serializer):
			
 
				+    """Serializer for batch extraction response."""
			
 
				+    results = serializers.ListField(child=ProductAttributeResultSerializer())
			
 
				+    total_products = serializers.IntegerField()
			
 
				+    successful = serializers.IntegerField()
			
 
				+    failed = serializers.IntegerField()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+from rest_framework import serializers
			
 
				+from .models import Product
			
 
				+
			
 
				+class ProductSerializer(serializers.ModelSerializer):
			
 
				+    class Meta:
			
 
				+        model = Product
			
 
				+        fields = [
			
 
				+            'id',
			
 
				+            'item_id',
			
 
				+            'product_name',
			
 
				+            'product_long_description',
			
 
				+            'product_short_description',
			
 
				+            'product_type',
			
 
				+            'image_path',
			
 
				+            'image',
			
 
				+        ]
			
--- a/attr_extraction/services.py
+++ b/attr_extraction/services.py
@@ -0,0 +1,942 @@
 
				+# # import requests
			
 
				+# # import json
			
 
				+# # from typing import Dict, List, Optional
			
 
				+# # from django.conf import settings
			
 
				+
			
 
				+
			
 
				+# # class ProductAttributeService:
			
 
				+# #     """Service class for extracting product attributes using Groq LLM."""
			
 
				+
			
 
				+# #     @staticmethod
			
 
				+# #     def combine_product_text(
			
 
				+# #         title: Optional[str] = None,
			
 
				+# #         short_desc: Optional[str] = None,
			
 
				+# #         long_desc: Optional[str] = None
			
 
				+# #     ) -> str:
			
 
				+# #         """Combine product metadata into a single text block."""
			
 
				+# #         parts = []
			
 
				+# #         if title:
			
 
				+# #             parts.append(str(title).strip())
			
 
				+# #         if short_desc:
			
 
				+# #             parts.append(str(short_desc).strip())
			
 
				+# #         if long_desc:
			
 
				+# #             parts.append(str(long_desc).strip())
			
 
				+
			
 
				+# #         combined = " ".join(parts).strip()
			
 
				+
			
 
				+# #         if not combined:
			
 
				+# #             return "No product information available"
			
 
				+
			
 
				+# #         return combined
			
 
				+
			
 
				+# #     @staticmethod
			
 
				+# #     def extract_attributes(
			
 
				+# #         product_text: str,
			
 
				+# #         mandatory_attrs: Dict[str, List[str]],
			
 
				+# #         model: str = None,
			
 
				+# #         extract_additional: bool = True
			
 
				+# #     ) -> dict:
			
 
				+# #         """Use Groq LLM to extract attributes from any product type."""
			
 
				+        
			
 
				+# #         if model is None:
			
 
				+# #             model = settings.SUPPORTED_MODELS[0]
			
 
				+
			
 
				+# #         # Check if product text is empty or minimal
			
 
				+# #         if not product_text or product_text == "No product information available":
			
 
				+# #             return ProductAttributeService._create_error_response(
			
 
				+# #                 "No product information provided",
			
 
				+# #                 mandatory_attrs,
			
 
				+# #                 extract_additional
			
 
				+# #             )
			
 
				+
			
 
				+# #         # Create structured prompt for mandatory attributes
			
 
				+# #         mandatory_attr_list = []
			
 
				+# #         for attr_name, allowed_values in mandatory_attrs.items():
			
 
				+# #             mandatory_attr_list.append(f"{attr_name}: {', '.join(allowed_values)}")
			
 
				+# #         mandatory_attr_text = "\n".join(mandatory_attr_list)
			
 
				+
			
 
				+# #         additional_instruction = ""
			
 
				+# #         if extract_additional:
			
 
				+# #             additional_instruction = """
			
 
				+# # 2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
			
 
				+# #    (such as Material, Size, Color, Brand, Dimensions, Weight, Features, Specifications, etc.) 
			
 
				+# #    and their values. Extract attributes that are specific and relevant to this product type."""
			
 
				+
			
 
				+# #         output_format = {
			
 
				+# #             "mandatory": {attr: "value" for attr in mandatory_attrs.keys()},
			
 
				+# #             "additional": {} if extract_additional else None
			
 
				+# #         }
			
 
				+
			
 
				+# #         if not extract_additional:
			
 
				+# #             output_format.pop("additional")
			
 
				+
			
 
				+# #         prompt = f"""
			
 
				+# # You are an intelligent product attribute extractor that works with ANY product type.
			
 
				+
			
 
				+# # TASK:
			
 
				+# # 1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value 
			
 
				+# #    from the provided list. Choose the value that best matches the product description.
			
 
				+# # {additional_instruction}
			
 
				+
			
 
				+# # Product Text:
			
 
				+# # {product_text}
			
 
				+
			
 
				+# # Mandatory Attribute Lists (MUST select one value for each):
			
 
				+# # {mandatory_attr_text}
			
 
				+
			
 
				+# # CRITICAL INSTRUCTIONS:
			
 
				+# # - Return ONLY valid JSON, nothing else
			
 
				+# # - No explanations, no markdown, no text before or after the JSON
			
 
				+# # - For mandatory attributes, choose EXACTLY ONE value from the provided list that best matches
			
 
				+# # - If a mandatory attribute cannot be determined from the product text, use "Not Specified"
			
 
				+# # - Work with whatever information is available - the product text may be incomplete (only title, or only description, etc.)
			
 
				+# # {f"- For additional attributes, extract any relevant information found in the product text" if extract_additional else ""}
			
 
				+# # - Be precise and only extract information that is explicitly stated or clearly implied
			
 
				+
			
 
				+# # Required Output Format (ONLY THIS, NO OTHER TEXT):
			
 
				+# # {json.dumps(output_format, indent=2)}
			
 
				+# #         """
			
 
				+
			
 
				+# #         payload = {
			
 
				+# #             "model": model,
			
 
				+# #             "messages": [
			
 
				+# #                 {
			
 
				+# #                     "role": "system",
			
 
				+# #                     "content": f"You are a precise attribute extraction model. Return ONLY valid JSON with {'mandatory and additional' if extract_additional else 'mandatory'} sections. No explanations, no markdown, no other text."
			
 
				+# #                 },
			
 
				+# #                 {"role": "user", "content": prompt}
			
 
				+# #             ],
			
 
				+# #             "temperature": 0.0,
			
 
				+# #             "max_tokens": 1500
			
 
				+# #         }
			
 
				+
			
 
				+# #         headers = {
			
 
				+# #             "Authorization": f"Bearer {settings.GROQ_API_KEY}",
			
 
				+# #             "Content-Type": "application/json",
			
 
				+# #         }
			
 
				+
			
 
				+# #         try:
			
 
				+# #             response = requests.post(
			
 
				+# #                 settings.GROQ_API_URL,
			
 
				+# #                 headers=headers,
			
 
				+# #                 json=payload,
			
 
				+# #                 timeout=30
			
 
				+# #             )
			
 
				+# #             response.raise_for_status()
			
 
				+# #             result_text = response.json()["choices"][0]["message"]["content"].strip()
			
 
				+
			
 
				+# #             # Clean the response
			
 
				+# #             result_text = ProductAttributeService._clean_json_response(result_text)
			
 
				+
			
 
				+# #             # Parse JSON
			
 
				+# #             parsed = json.loads(result_text)
			
 
				+
			
 
				+# #             # Validate and restructure if needed
			
 
				+# #             parsed = ProductAttributeService._validate_response_structure(
			
 
				+# #                 parsed, mandatory_attrs, extract_additional
			
 
				+# #             )
			
 
				+
			
 
				+# #             return parsed
			
 
				+
			
 
				+# #         except requests.exceptions.RequestException as e:
			
 
				+# #             return ProductAttributeService._create_error_response(
			
 
				+# #                 str(e), mandatory_attrs, extract_additional
			
 
				+# #             )
			
 
				+# #         except json.JSONDecodeError as e:
			
 
				+# #             return ProductAttributeService._create_error_response(
			
 
				+# #                 f"Invalid JSON: {str(e)}", mandatory_attrs, extract_additional, result_text
			
 
				+# #             )
			
 
				+# #         except Exception as e:
			
 
				+# #             return ProductAttributeService._create_error_response(
			
 
				+# #                 str(e), mandatory_attrs, extract_additional
			
 
				+# #             )
			
 
				+
			
 
				+# #     @staticmethod
			
 
				+# #     def _clean_json_response(text: str) -> str:
			
 
				+# #         """Clean LLM response to extract valid JSON."""
			
 
				+# #         start_idx = text.find('{')
			
 
				+# #         end_idx = text.rfind('}')
			
 
				+
			
 
				+# #         if start_idx != -1 and end_idx != -1:
			
 
				+# #             text = text[start_idx:end_idx + 1]
			
 
				+
			
 
				+# #         if "```json" in text:
			
 
				+# #             text = text.split("```json")[1].split("```")[0].strip()
			
 
				+# #         elif "```" in text:
			
 
				+# #             text = text.split("```")[1].split("```")[0].strip()
			
 
				+# #             if text.startswith("json"):
			
 
				+# #                 text = text[4:].strip()
			
 
				+
			
 
				+# #         return text
			
 
				+
			
 
				+# #     @staticmethod
			
 
				+# #     def _validate_response_structure(
			
 
				+# #         parsed: dict,
			
 
				+# #         mandatory_attrs: Dict[str, List[str]],
			
 
				+# #         extract_additional: bool
			
 
				+# #     ) -> dict:
			
 
				+# #         """Validate and fix the response structure."""
			
 
				+# #         expected_sections = ["mandatory"]
			
 
				+# #         if extract_additional:
			
 
				+# #             expected_sections.append("additional")
			
 
				+
			
 
				+# #         if not all(section in parsed for section in expected_sections):
			
 
				+# #             if isinstance(parsed, dict):
			
 
				+# #                 mandatory_keys = set(mandatory_attrs.keys())
			
 
				+# #                 mandatory = {k: v for k, v in parsed.items() if k in mandatory_keys}
			
 
				+# #                 additional = {k: v for k, v in parsed.items() if k not in mandatory_keys}
			
 
				+
			
 
				+# #                 result = {"mandatory": mandatory}
			
 
				+# #                 if extract_additional:
			
 
				+# #                     result["additional"] = additional
			
 
				+# #                 return result
			
 
				+# #             else:
			
 
				+# #                 return ProductAttributeService._create_error_response(
			
 
				+# #                     "Invalid response structure",
			
 
				+# #                     mandatory_attrs,
			
 
				+# #                     extract_additional,
			
 
				+# #                     str(parsed)
			
 
				+# #                 )
			
 
				+
			
 
				+# #         return parsed
			
 
				+
			
 
				+# #     @staticmethod
			
 
				+# #     def _create_error_response(
			
 
				+# #         error: str,
			
 
				+# #         mandatory_attrs: Dict[str, List[str]],
			
 
				+# #         extract_additional: bool,
			
 
				+# #         raw_output: Optional[str] = None
			
 
				+# #     ) -> dict:
			
 
				+# #         """Create a standardized error response."""
			
 
				+# #         response = {
			
 
				+# #             "mandatory": {attr: "Not Specified" for attr in mandatory_attrs.keys()},
			
 
				+# #             "error": error
			
 
				+# #         }
			
 
				+# #         if extract_additional:
			
 
				+# #             response["additional"] = {}
			
 
				+# #         if raw_output:
			
 
				+# #             response["raw_output"] = raw_output
			
 
				+# #         return response
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# import requests
			
 
				+# import json
			
 
				+# from typing import Dict, List, Optional
			
 
				+# from django.conf import settings
			
 
				+# from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				+
			
 
				+
			
 
				+# class ProductAttributeService:
			
 
				+#     """Service class for extracting product attributes using Groq LLM."""
			
 
				+
			
 
				+#     @staticmethod
			
 
				+#     def combine_product_text(
			
 
				+#         title: Optional[str] = None,
			
 
				+#         short_desc: Optional[str] = None,
			
 
				+#         long_desc: Optional[str] = None
			
 
				+#     ) -> str:
			
 
				+#         """Combine product metadata into a single text block."""
			
 
				+#         parts = []
			
 
				+#         if title:
			
 
				+#             parts.append(str(title).strip())
			
 
				+#         if short_desc:
			
 
				+#             parts.append(str(short_desc).strip())
			
 
				+#         if long_desc:
			
 
				+#             parts.append(str(long_desc).strip())
			
 
				+
			
 
				+#         combined = " ".join(parts).strip()
			
 
				+
			
 
				+#         if not combined:
			
 
				+#             return "No product information available"
			
 
				+
			
 
				+#         return combined
			
 
				+
			
 
				+#     @staticmethod
			
 
				+#     def extract_attributes(
			
 
				+#         product_text: str,
			
 
				+#         mandatory_attrs: Dict[str, List[str]],
			
 
				+#         model: str = None,
			
 
				+#         extract_additional: bool = True
			
 
				+#     ) -> dict:
			
 
				+#         """Use Groq LLM to extract attributes from any product type."""
			
 
				+        
			
 
				+#         if model is None:
			
 
				+#             model = settings.SUPPORTED_MODELS[0]
			
 
				+
			
 
				+#         # Check if product text is empty or minimal
			
 
				+#         if not product_text or product_text == "No product information available":
			
 
				+#             return ProductAttributeService._create_error_response(
			
 
				+#                 "No product information provided",
			
 
				+#                 mandatory_attrs,
			
 
				+#                 extract_additional
			
 
				+#             )
			
 
				+
			
 
				+#         # Create structured prompt for mandatory attributes
			
 
				+#         mandatory_attr_list = []
			
 
				+#         for attr_name, allowed_values in mandatory_attrs.items():
			
 
				+#             mandatory_attr_list.append(f"{attr_name}: {', '.join(allowed_values)}")
			
 
				+#         mandatory_attr_text = "\n".join(mandatory_attr_list)
			
 
				+
			
 
				+#         additional_instruction = ""
			
 
				+#         if extract_additional:
			
 
				+#             additional_instruction = """
			
 
				+# 2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
			
 
				+#    (such as Material, Size, Color, Brand, Dimensions, Weight, Features, Specifications, etc.) 
			
 
				+#    and their values. Extract attributes that are specific and relevant to this product type."""
			
 
				+
			
 
				+#         output_format = {
			
 
				+#             "mandatory": {attr: "value" for attr in mandatory_attrs.keys()},
			
 
				+#             "additional": {} if extract_additional else None
			
 
				+#         }
			
 
				+
			
 
				+#         if not extract_additional:
			
 
				+#             output_format.pop("additional")
			
 
				+
			
 
				+#         prompt = f"""
			
 
				+# You are an intelligent product attribute extractor that works with ANY product type.
			
 
				+
			
 
				+# TASK:
			
 
				+# 1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value 
			
 
				+#    from the provided list. Choose the value that best matches the product description.
			
 
				+# {additional_instruction}
			
 
				+
			
 
				+# Product Text:
			
 
				+# {product_text}
			
 
				+
			
 
				+# Mandatory Attribute Lists (MUST select one value for each):
			
 
				+# {mandatory_attr_text}
			
 
				+
			
 
				+# CRITICAL INSTRUCTIONS:
			
 
				+# - Return ONLY valid JSON, nothing else
			
 
				+# - No explanations, no markdown, no text before or after the JSON
			
 
				+# - For mandatory attributes, choose EXACTLY ONE value from the provided list that best matches
			
 
				+# - If a mandatory attribute cannot be determined from the product text, use "Not Specified"
			
 
				+# - Work with whatever information is available - the product text may be incomplete (only title, or only description, etc.)
			
 
				+# {f"- For additional attributes, extract any relevant information found in the product text" if extract_additional else ""}
			
 
				+# - Be precise and only extract information that is explicitly stated or clearly implied
			
 
				+
			
 
				+# Required Output Format (ONLY THIS, NO OTHER TEXT):
			
 
				+# {json.dumps(output_format, indent=2)}
			
 
				+#         """
			
 
				+
			
 
				+#         payload = {
			
 
				+#             "model": model,
			
 
				+#             "messages": [
			
 
				+#                 {
			
 
				+#                     "role": "system",
			
 
				+#                     "content": f"You are a precise attribute extraction model. Return ONLY valid JSON with {'mandatory and additional' if extract_additional else 'mandatory'} sections. No explanations, no markdown, no other text."
			
 
				+#                 },
			
 
				+#                 {"role": "user", "content": prompt}
			
 
				+#             ],
			
 
				+#             "temperature": 0.0,
			
 
				+#             "max_tokens": 1500
			
 
				+#         }
			
 
				+
			
 
				+#         headers = {
			
 
				+#             "Authorization": f"Bearer {settings.GROQ_API_KEY}",
			
 
				+#             "Content-Type": "application/json",
			
 
				+#         }
			
 
				+
			
 
				+#         try:
			
 
				+#             response = requests.post(
			
 
				+#                 settings.GROQ_API_URL,
			
 
				+#                 headers=headers,
			
 
				+#                 json=payload,
			
 
				+#                 timeout=30
			
 
				+#             )
			
 
				+#             response.raise_for_status()
			
 
				+#             result_text = response.json()["choices"][0]["message"]["content"].strip()
			
 
				+
			
 
				+#             # Clean the response
			
 
				+#             result_text = ProductAttributeService._clean_json_response(result_text)
			
 
				+
			
 
				+#             # Parse JSON
			
 
				+#             parsed = json.loads(result_text)
			
 
				+
			
 
				+#             # Validate and restructure if needed
			
 
				+#             parsed = ProductAttributeService._validate_response_structure(
			
 
				+#                 parsed, mandatory_attrs, extract_additional
			
 
				+#             )
			
 
				+
			
 
				+#             return parsed
			
 
				+
			
 
				+#         except requests.exceptions.RequestException as e:
			
 
				+#             return ProductAttributeService._create_error_response(
			
 
				+#                 str(e), mandatory_attrs, extract_additional
			
 
				+#             )
			
 
				+#         except json.JSONDecodeError as e:
			
 
				+#             return ProductAttributeService._create_error_response(
			
 
				+#                 f"Invalid JSON: {str(e)}", mandatory_attrs, extract_additional, result_text
			
 
				+#             )
			
 
				+#         except Exception as e:
			
 
				+#             return ProductAttributeService._create_error_response(
			
 
				+#                 str(e), mandatory_attrs, extract_additional
			
 
				+#             )
			
 
				+
			
 
				+#     @staticmethod
			
 
				+#     def extract_attributes_batch(
			
 
				+#         products: List[Dict],
			
 
				+#         mandatory_attrs: Dict[str, List[str]],
			
 
				+#         model: str = None,
			
 
				+#         extract_additional: bool = True,
			
 
				+#         max_workers: int = 5
			
 
				+#     ) -> Dict:
			
 
				+#         """
			
 
				+#         Extract attributes for multiple products in parallel.
			
 
				+        
			
 
				+#         Args:
			
 
				+#             products: List of product dictionaries with keys: product_id, title, short_desc, long_desc
			
 
				+#             mandatory_attrs: Dictionary of mandatory attributes
			
 
				+#             model: Groq model to use
			
 
				+#             extract_additional: Whether to extract additional attributes
			
 
				+#             max_workers: Maximum number of parallel workers
			
 
				+            
			
 
				+#         Returns:
			
 
				+#             Dictionary with results, total_products, successful, and failed counts
			
 
				+#         """
			
 
				+#         results = []
			
 
				+#         successful = 0
			
 
				+#         failed = 0
			
 
				+
			
 
				+#         def process_product(product_data):
			
 
				+#             """Process a single product."""
			
 
				+#             product_id = product_data.get('product_id', f"product_{len(results)}")
			
 
				+            
			
 
				+#             try:
			
 
				+#                 product_text = ProductAttributeService.combine_product_text(
			
 
				+#                     title=product_data.get('title'),
			
 
				+#                     short_desc=product_data.get('short_desc'),
			
 
				+#                     long_desc=product_data.get('long_desc')
			
 
				+#                 )
			
 
				+                
			
 
				+#                 result = ProductAttributeService.extract_attributes(
			
 
				+#                     product_text=product_text,
			
 
				+#                     mandatory_attrs=mandatory_attrs,
			
 
				+#                     model=model,
			
 
				+#                     extract_additional=extract_additional
			
 
				+#                 )
			
 
				+                
			
 
				+#                 result['product_id'] = product_id
			
 
				+                
			
 
				+#                 # Check if extraction was successful
			
 
				+#                 if 'error' not in result:
			
 
				+#                     return result, True
			
 
				+#                 else:
			
 
				+#                     return result, False
			
 
				+                    
			
 
				+#             except Exception as e:
			
 
				+#                 return {
			
 
				+#                     'product_id': product_id,
			
 
				+#                     'mandatory': {attr: "Not Specified" for attr in mandatory_attrs.keys()},
			
 
				+#                     'additional': {} if extract_additional else None,
			
 
				+#                     'error': f"Processing error: {str(e)}"
			
 
				+#                 }, False
			
 
				+
			
 
				+#         # Process products in parallel
			
 
				+#         with ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				+#             future_to_product = {
			
 
				+#                 executor.submit(process_product, product): product 
			
 
				+#                 for product in products
			
 
				+#             }
			
 
				+            
			
 
				+#             for future in as_completed(future_to_product):
			
 
				+#                 try:
			
 
				+#                     result, success = future.result()
			
 
				+#                     results.append(result)
			
 
				+#                     if success:
			
 
				+#                         successful += 1
			
 
				+#                     else:
			
 
				+#                         failed += 1
			
 
				+#                 except Exception as e:
			
 
				+#                     failed += 1
			
 
				+#                     results.append({
			
 
				+#                         'product_id': 'unknown',
			
 
				+#                         'mandatory': {attr: "Not Specified" for attr in mandatory_attrs.keys()},
			
 
				+#                         'additional': {} if extract_additional else None,
			
 
				+#                         'error': f"Unexpected error: {str(e)}"
			
 
				+#                     })
			
 
				+
			
 
				+#         return {
			
 
				+#             'results': results,
			
 
				+#             'total_products': len(products),
			
 
				+#             'successful': successful,
			
 
				+#             'failed': failed
			
 
				+#         }
			
 
				+
			
 
				+#     @staticmethod
			
 
				+#     def _clean_json_response(text: str) -> str:
			
 
				+#         """Clean LLM response to extract valid JSON."""
			
 
				+#         start_idx = text.find('{')
			
 
				+#         end_idx = text.rfind('}')
			
 
				+
			
 
				+#         if start_idx != -1 and end_idx != -1:
			
 
				+#             text = text[start_idx:end_idx + 1]
			
 
				+
			
 
				+#         if "```json" in text:
			
 
				+#             text = text.split("```json")[1].split("```")[0].strip()
			
 
				+#         elif "```" in text:
			
 
				+#             text = text.split("```")[1].split("```")[0].strip()
			
 
				+#             if text.startswith("json"):
			
 
				+#                 text = text[4:].strip()
			
 
				+
			
 
				+#         return text
			
 
				+
			
 
				+#     @staticmethod
			
 
				+#     def _validate_response_structure(
			
 
				+#         parsed: dict,
			
 
				+#         mandatory_attrs: Dict[str, List[str]],
			
 
				+#         extract_additional: bool
			
 
				+#     ) -> dict:
			
 
				+#         """Validate and fix the response structure."""
			
 
				+#         expected_sections = ["mandatory"]
			
 
				+#         if extract_additional:
			
 
				+#             expected_sections.append("additional")
			
 
				+
			
 
				+#         if not all(section in parsed for section in expected_sections):
			
 
				+#             if isinstance(parsed, dict):
			
 
				+#                 mandatory_keys = set(mandatory_attrs.keys())
			
 
				+#                 mandatory = {k: v for k, v in parsed.items() if k in mandatory_keys}
			
 
				+#                 additional = {k: v for k, v in parsed.items() if k not in mandatory_keys}
			
 
				+
			
 
				+#                 result = {"mandatory": mandatory}
			
 
				+#                 if extract_additional:
			
 
				+#                     result["additional"] = additional
			
 
				+#                 return result
			
 
				+#             else:
			
 
				+#                 return ProductAttributeService._create_error_response(
			
 
				+#                     "Invalid response structure",
			
 
				+#                     mandatory_attrs,
			
 
				+#                     extract_additional,
			
 
				+#                     str(parsed)
			
 
				+#                 )
			
 
				+
			
 
				+#         return parsed
			
 
				+
			
 
				+#     @staticmethod
			
 
				+#     def _create_error_response(
			
 
				+#         error: str,
			
 
				+#         mandatory_attrs: Dict[str, List[str]],
			
 
				+#         extract_additional: bool,
			
 
				+#         raw_output: Optional[str] = None
			
 
				+#     ) -> dict:
			
 
				+#         """Create a standardized error response."""
			
 
				+#         response = {
			
 
				+#             "mandatory": {attr: "Not Specified" for attr in mandatory_attrs.keys()},
			
 
				+#             "error": error
			
 
				+#         }
			
 
				+#         if extract_additional:
			
 
				+#             response["additional"] = {}
			
 
				+#         if raw_output:
			
 
				+#             response["raw_output"] = raw_output
			
 
				+#         return response
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== services.py ====================
			
 
				+import requests
			
 
				+import json
			
 
				+from typing import Dict, List, Optional
			
 
				+from django.conf import settings
			
 
				+from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				+from .ocr_service import OCRService
			
 
				+
			
 
				+
			
 
				+class ProductAttributeService:
			
 
				+    """Service class for extracting product attributes using Groq LLM."""
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def combine_product_text(
			
 
				+        title: Optional[str] = None,
			
 
				+        short_desc: Optional[str] = None,
			
 
				+        long_desc: Optional[str] = None,
			
 
				+        ocr_text: Optional[str] = None
			
 
				+    ) -> str:
			
 
				+        """Combine product metadata into a single text block."""
			
 
				+        parts = []
			
 
				+        if title:
			
 
				+            parts.append(f"Title: {str(title).strip()}")
			
 
				+        if short_desc:
			
 
				+            parts.append(f"Description: {str(short_desc).strip()}")
			
 
				+        if long_desc:
			
 
				+            parts.append(f"Details: {str(long_desc).strip()}")
			
 
				+        if ocr_text:
			
 
				+            parts.append(f"OCR Text: {ocr_text}")
			
 
				+        
			
 
				+        combined = "\n".join(parts).strip()
			
 
				+        
			
 
				+        if not combined:
			
 
				+            return "No product information available"
			
 
				+        
			
 
				+        return combined
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def extract_attributes_from_ocr(ocr_results: Dict, model: str = None) -> Dict:
			
 
				+        """Extract structured attributes from OCR text using LLM."""
			
 
				+        if model is None:
			
 
				+            model = settings.SUPPORTED_MODELS[0]
			
 
				+        
			
 
				+        detected_text = ocr_results.get('detected_text', [])
			
 
				+        if not detected_text:
			
 
				+            return {}
			
 
				+        
			
 
				+        # Format OCR text for prompt
			
 
				+        ocr_text = "\n".join([f"Text: {item['text']}, Confidence: {item['confidence']:.2f}" 
			
 
				+                              for item in detected_text])
			
 
				+        
			
 
				+        prompt = f"""
			
 
				+You are an AI model that extracts structured attributes from OCR text detected on product images.
			
 
				+Given the OCR detections below, infer the possible product attributes and return them as a clean JSON object.
			
 
				+
			
 
				+OCR Text:
			
 
				+{ocr_text}
			
 
				+
			
 
				+Extract relevant attributes like:
			
 
				+- brand
			
 
				+- model_number
			
 
				+- size (waist_size, length, etc.)
			
 
				+- collection
			
 
				+- any other relevant product information
			
 
				+
			
 
				+Return a JSON object with only the attributes you can confidently identify.
			
 
				+If an attribute is not present, do not include it in the response.
			
 
				+"""
			
 
				+        
			
 
				+        payload = {
			
 
				+            "model": model,
			
 
				+            "messages": [
			
 
				+                {
			
 
				+                    "role": "system",
			
 
				+                    "content": "You are a helpful AI that extracts structured data from OCR output. Return only valid JSON."
			
 
				+                },
			
 
				+                {"role": "user", "content": prompt}
			
 
				+            ],
			
 
				+            "temperature": 0.2,
			
 
				+            "max_tokens": 500
			
 
				+        }
			
 
				+        
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {settings.GROQ_API_KEY}",
			
 
				+            "Content-Type": "application/json",
			
 
				+        }
			
 
				+        
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                settings.GROQ_API_URL,
			
 
				+                headers=headers,
			
 
				+                json=payload,
			
 
				+                timeout=30
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+            result_text = response.json()["choices"][0]["message"]["content"].strip()
			
 
				+            
			
 
				+            # Clean and parse JSON
			
 
				+            result_text = ProductAttributeService._clean_json_response(result_text)
			
 
				+            parsed = json.loads(result_text)
			
 
				+            
			
 
				+            return parsed
			
 
				+        except Exception as e:
			
 
				+            return {"error": f"Failed to extract attributes from OCR: {str(e)}"}
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def extract_attributes(
			
 
				+        product_text: str,
			
 
				+        mandatory_attrs: Dict[str, List[str]],
			
 
				+        model: str = None,
			
 
				+        extract_additional: bool = True
			
 
				+    ) -> dict:
			
 
				+        """Use Groq LLM to extract attributes from any product type."""
			
 
				+        
			
 
				+        if model is None:
			
 
				+            model = settings.SUPPORTED_MODELS[0]
			
 
				+
			
 
				+        # Check if product text is empty or minimal
			
 
				+        if not product_text or product_text == "No product information available":
			
 
				+            return ProductAttributeService._create_error_response(
			
 
				+                "No product information provided",
			
 
				+                mandatory_attrs,
			
 
				+                extract_additional
			
 
				+            )
			
 
				+
			
 
				+        # Create structured prompt for mandatory attributes
			
 
				+        mandatory_attr_list = []
			
 
				+        for attr_name, allowed_values in mandatory_attrs.items():
			
 
				+            mandatory_attr_list.append(f"{attr_name}: {', '.join(allowed_values)}")
			
 
				+        mandatory_attr_text = "\n".join(mandatory_attr_list)
			
 
				+
			
 
				+        additional_instruction = ""
			
 
				+        if extract_additional:
			
 
				+            additional_instruction = """
			
 
				+2. Extract ADDITIONAL attributes: Identify any other relevant attributes from the product text 
			
 
				+   (such as Material, Size, Color, Brand, Dimensions, Weight, Features, Specifications, etc.) 
			
 
				+   and their values. Extract attributes that are specific and relevant to this product type."""
			
 
				+
			
 
				+        output_format = {
			
 
				+            "mandatory": {attr: "value" for attr in mandatory_attrs.keys()},
			
 
				+            "additional": {} if extract_additional else None
			
 
				+        }
			
 
				+
			
 
				+        if not extract_additional:
			
 
				+            output_format.pop("additional")
			
 
				+
			
 
				+        prompt = f"""
			
 
				+You are an intelligent product attribute extractor that works with ANY product type.
			
 
				+
			
 
				+TASK:
			
 
				+1. Extract MANDATORY attributes: For each mandatory attribute, select the most appropriate value 
			
 
				+   from the provided list. Choose the value that best matches the product description.
			
 
				+{additional_instruction}
			
 
				+
			
 
				+Product Text:
			
 
				+{product_text}
			
 
				+
			
 
				+Mandatory Attribute Lists (MUST select one value for each):
			
 
				+{mandatory_attr_text}
			
 
				+
			
 
				+CRITICAL INSTRUCTIONS:
			
 
				+- Return ONLY valid JSON, nothing else
			
 
				+- No explanations, no markdown, no text before or after the JSON
			
 
				+- For mandatory attributes, choose EXACTLY ONE value from the provided list that best matches
			
 
				+- If a mandatory attribute cannot be determined from the product text, use "Not Specified"
			
 
				+- Work with whatever information is available - the product text may be incomplete
			
 
				+{f"- For additional attributes, extract any relevant information found in the product text" if extract_additional else ""}
			
 
				+- Be precise and only extract information that is explicitly stated or clearly implied
			
 
				+
			
 
				+Required Output Format (ONLY THIS, NO OTHER TEXT):
			
 
				+{json.dumps(output_format, indent=2)}
			
 
				+        """
			
 
				+
			
 
				+        payload = {
			
 
				+            "model": model,
			
 
				+            "messages": [
			
 
				+                {
			
 
				+                    "role": "system",
			
 
				+                    "content": f"You are a precise attribute extraction model. Return ONLY valid JSON with {'mandatory and additional' if extract_additional else 'mandatory'} sections. No explanations, no markdown, no other text."
			
 
				+                },
			
 
				+                {"role": "user", "content": prompt}
			
 
				+            ],
			
 
				+            "temperature": 0.0,
			
 
				+            "max_tokens": 1500
			
 
				+        }
			
 
				+
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {settings.GROQ_API_KEY}",
			
 
				+            "Content-Type": "application/json",
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                settings.GROQ_API_URL,
			
 
				+                headers=headers,
			
 
				+                json=payload,
			
 
				+                timeout=30
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+            result_text = response.json()["choices"][0]["message"]["content"].strip()
			
 
				+
			
 
				+            # Clean the response
			
 
				+            result_text = ProductAttributeService._clean_json_response(result_text)
			
 
				+
			
 
				+            # Parse JSON
			
 
				+            parsed = json.loads(result_text)
			
 
				+
			
 
				+            # Validate and restructure if needed
			
 
				+            parsed = ProductAttributeService._validate_response_structure(
			
 
				+                parsed, mandatory_attrs, extract_additional
			
 
				+            )
			
 
				+
			
 
				+            return parsed
			
 
				+
			
 
				+        except requests.exceptions.RequestException as e:
			
 
				+            return ProductAttributeService._create_error_response(
			
 
				+                str(e), mandatory_attrs, extract_additional
			
 
				+            )
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            return ProductAttributeService._create_error_response(
			
 
				+                f"Invalid JSON: {str(e)}", mandatory_attrs, extract_additional, result_text
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            return ProductAttributeService._create_error_response(
			
 
				+                str(e), mandatory_attrs, extract_additional
			
 
				+            )
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def extract_attributes_batch(
			
 
				+        products: List[Dict],
			
 
				+        mandatory_attrs: Dict[str, List[str]],
			
 
				+        model: str = None,
			
 
				+        extract_additional: bool = True,
			
 
				+        process_image: bool = True,
			
 
				+        max_workers: int = 5
			
 
				+    ) -> Dict:
			
 
				+        """Extract attributes for multiple products in parallel."""
			
 
				+        results = []
			
 
				+        successful = 0
			
 
				+        failed = 0
			
 
				+        
			
 
				+        ocr_service = OCRService()
			
 
				+
			
 
				+        def process_product(product_data):
			
 
				+            """Process a single product."""
			
 
				+            product_id = product_data.get('product_id', f"product_{len(results)}")
			
 
				+            
			
 
				+            try:
			
 
				+                # Process image if URL is provided
			
 
				+                ocr_results = None
			
 
				+                ocr_text = None
			
 
				+                
			
 
				+                if process_image and product_data.get('image_url'):
			
 
				+                    ocr_results = ocr_service.process_image(product_data['image_url'])
			
 
				+                    
			
 
				+                    # Extract attributes from OCR
			
 
				+                    if ocr_results and ocr_results.get('detected_text'):
			
 
				+                        ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				+                            ocr_results, model
			
 
				+                        )
			
 
				+                        ocr_results['extracted_attributes'] = ocr_attrs
			
 
				+                        
			
 
				+                        # Format OCR text for combining with product text
			
 
				+                        ocr_text = "\n".join([
			
 
				+                            f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				+                            for item in ocr_results['detected_text']
			
 
				+                        ])
			
 
				+                
			
 
				+                # Combine all product information
			
 
				+                product_text = ProductAttributeService.combine_product_text(
			
 
				+                    title=product_data.get('title'),
			
 
				+                    short_desc=product_data.get('short_desc'),
			
 
				+                    long_desc=product_data.get('long_desc'),
			
 
				+                    ocr_text=ocr_text
			
 
				+                )
			
 
				+                
			
 
				+                # Extract attributes from combined text
			
 
				+                result = ProductAttributeService.extract_attributes(
			
 
				+                    product_text=product_text,
			
 
				+                    mandatory_attrs=mandatory_attrs,
			
 
				+                    model=model,
			
 
				+                    extract_additional=extract_additional
			
 
				+                )
			
 
				+                
			
 
				+                result['product_id'] = product_id
			
 
				+                
			
 
				+                # Add OCR results if available
			
 
				+                if ocr_results:
			
 
				+                    result['ocr_results'] = ocr_results
			
 
				+                
			
 
				+                # Check if extraction was successful
			
 
				+                if 'error' not in result:
			
 
				+                    return result, True
			
 
				+                else:
			
 
				+                    return result, False
			
 
				+                    
			
 
				+            except Exception as e:
			
 
				+                return {
			
 
				+                    'product_id': product_id,
			
 
				+                    'mandatory': {attr: "Not Specified" for attr in mandatory_attrs.keys()},
			
 
				+                    'additional': {} if extract_additional else None,
			
 
				+                    'error': f"Processing error: {str(e)}"
			
 
				+                }, False
			
 
				+
			
 
				+        # Process products in parallel
			
 
				+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				+            future_to_product = {
			
 
				+                executor.submit(process_product, product): product 
			
 
				+                for product in products
			
 
				+            }
			
 
				+            
			
 
				+            for future in as_completed(future_to_product):
			
 
				+                try:
			
 
				+                    result, success = future.result()
			
 
				+                    results.append(result)
			
 
				+                    if success:
			
 
				+                        successful += 1
			
 
				+                    else:
			
 
				+                        failed += 1
			
 
				+                except Exception as e:
			
 
				+                    failed += 1
			
 
				+                    results.append({
			
 
				+                        'product_id': 'unknown',
			
 
				+                        'mandatory': {attr: "Not Specified" for attr in mandatory_attrs.keys()},
			
 
				+                        'additional': {} if extract_additional else None,
			
 
				+                        'error': f"Unexpected error: {str(e)}"
			
 
				+                    })
			
 
				+
			
 
				+        return {
			
 
				+            'results': results,
			
 
				+            'total_products': len(products),
			
 
				+            'successful': successful,
			
 
				+            'failed': failed
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _clean_json_response(text: str) -> str:
			
 
				+        """Clean LLM response to extract valid JSON."""
			
 
				+        start_idx = text.find('{')
			
 
				+        end_idx = text.rfind('}')
			
 
				+
			
 
				+        if start_idx != -1 and end_idx != -1:
			
 
				+            text = text[start_idx:end_idx + 1]
			
 
				+
			
 
				+        if "```json" in text:
			
 
				+            text = text.split("```json")[1].split("```")[0].strip()
			
 
				+        elif "```" in text:
			
 
				+            text = text.split("```")[1].split("```")[0].strip()
			
 
				+            if text.startswith("json"):
			
 
				+                text = text[4:].strip()
			
 
				+
			
 
				+        return text
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _validate_response_structure(
			
 
				+        parsed: dict,
			
 
				+        mandatory_attrs: Dict[str, List[str]],
			
 
				+        extract_additional: bool
			
 
				+    ) -> dict:
			
 
				+        """Validate and fix the response structure."""
			
 
				+        expected_sections = ["mandatory"]
			
 
				+        if extract_additional:
			
 
				+            expected_sections.append("additional")
			
 
				+
			
 
				+        if not all(section in parsed for section in expected_sections):
			
 
				+            if isinstance(parsed, dict):
			
 
				+                mandatory_keys = set(mandatory_attrs.keys())
			
 
				+                mandatory = {k: v for k, v in parsed.items() if k in mandatory_keys}
			
 
				+                additional = {k: v for k, v in parsed.items() if k not in mandatory_keys}
			
 
				+
			
 
				+                result = {"mandatory": mandatory}
			
 
				+                if extract_additional:
			
 
				+                    result["additional"] = additional
			
 
				+                return result
			
 
				+            else:
			
 
				+                return ProductAttributeService._create_error_response(
			
 
				+                    "Invalid response structure",
			
 
				+                    mandatory_attrs,
			
 
				+                    extract_additional,
			
 
				+                    str(parsed)
			
 
				+                )
			
 
				+
			
 
				+        return parsed
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _create_error_response(
			
 
				+        error: str,
			
 
				+        mandatory_attrs: Dict[str, List[str]],
			
 
				+        extract_additional: bool,
			
 
				+        raw_output: Optional[str] = None
			
 
				+    ) -> dict:
			
 
				+        """Create a standardized error response."""
			
 
				+        response = {
			
 
				+            "mandatory": {attr: "Not Specified" for attr in mandatory_attrs.keys()},
			
 
				+            "error": error
			
 
				+        }
			
 
				+        if extract_additional:
			
 
				+            response["additional"] = {}
			
 
				+        if raw_output:
			
 
				+            response["raw_output"] = raw_output
			
 
				+        return response
			
 
				+
			
--- a/attr_extraction/tests.py
+++ b/attr_extraction/tests.py
@@ -0,0 +1,3 @@
 
				+from django.test import TestCase
			
 
				+
			
 
				+# Create your tests here.
			
--- a/attr_extraction/urls.py
+++ b/attr_extraction/urls.py
@@ -0,0 +1,9 @@
 
				+# ==================== urls.py ====================
			
 
				+from django.urls import path
			
 
				+from .views import ExtractProductAttributesView, BatchExtractProductAttributesView, ProductListView
			
 
				+
			
 
				+urlpatterns = [
			
 
				+    path('extract/', ExtractProductAttributesView.as_view(), name='extract-attributes'),
			
 
				+    path('batch-extract/', BatchExtractProductAttributesView.as_view(), name='batch-extract-attributes'),
			
 
				+    path('products/', ProductListView.as_view(), name='batch-extract-attributes'),
			
 
				+]
			
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -0,0 +1,352 @@
 
				+# #  #==================== views.py ====================
			
 
				+# # from rest_framework.views import APIView
			
 
				+# # from rest_framework.response import Response
			
 
				+# # from rest_framework import status
			
 
				+# # from .serializers import (
			
 
				+# #     ProductAttributeRequestSerializer,
			
 
				+# #     ProductAttributeResponseSerializer
			
 
				+# # )
			
 
				+# # from .services import ProductAttributeService
			
 
				+
			
 
				+
			
 
				+# # class ExtractProductAttributesView(APIView):
			
 
				+# #     """
			
 
				+# #     API endpoint to extract product attributes using Groq LLM.
			
 
				+    
			
 
				+# #     POST /api/extract-attributes/
			
 
				+    
			
 
				+# #     Request Body:
			
 
				+# #     {
			
 
				+# #         "title": "Product title (optional)",
			
 
				+# #         "short_desc": "Short description (optional)",
			
 
				+# #         "long_desc": "Long description (optional)",
			
 
				+# #         "mandatory_attrs": {
			
 
				+# #             "Attribute1": ["value1", "value2", "value3"],
			
 
				+# #             "Attribute2": ["valueA", "valueB"]
			
 
				+# #         },
			
 
				+# #         "model": "llama-3.1-8b-instant (optional)",
			
 
				+# #         "extract_additional": true (optional, default: true)
			
 
				+# #     }
			
 
				+    
			
 
				+# #     Response:
			
 
				+# #     {
			
 
				+# #         "mandatory": {
			
 
				+# #             "Attribute1": "value1",
			
 
				+# #             "Attribute2": "valueA"
			
 
				+# #         },
			
 
				+# #         "additional": {
			
 
				+# #             "Color": "Blue",
			
 
				+# #             "Brand": "Example"
			
 
				+# #         }
			
 
				+# #     }
			
 
				+# #     """
			
 
				+
			
 
				+# #     def post(self, request):
			
 
				+# #         # Validate request data
			
 
				+# #         serializer = ProductAttributeRequestSerializer(data=request.data)
			
 
				+# #         if not serializer.is_valid():
			
 
				+# #             return Response(
			
 
				+# #                 {"error": serializer.errors},
			
 
				+# #                 status=status.HTTP_400_BAD_REQUEST
			
 
				+# #             )
			
 
				+
			
 
				+# #         validated_data = serializer.validated_data
			
 
				+
			
 
				+# #         # Combine product text
			
 
				+# #         product_text = ProductAttributeService.combine_product_text(
			
 
				+# #             title=validated_data.get('title'),
			
 
				+# #             short_desc=validated_data.get('short_desc'),
			
 
				+# #             long_desc=validated_data.get('long_desc')
			
 
				+# #         )
			
 
				+
			
 
				+# #         # Extract attributes
			
 
				+# #         result = ProductAttributeService.extract_attributes(
			
 
				+# #             product_text=product_text,
			
 
				+# #             mandatory_attrs=validated_data['mandatory_attrs'],
			
 
				+# #             model=validated_data.get('model'),
			
 
				+# #             extract_additional=validated_data.get('extract_additional', True)
			
 
				+# #         )
			
 
				+
			
 
				+# #         # Return response
			
 
				+# #         response_serializer = ProductAttributeResponseSerializer(data=result)
			
 
				+# #         if response_serializer.is_valid():
			
 
				+# #             return Response(response_serializer.data, status=status.HTTP_200_OK)
			
 
				+        
			
 
				+# #         return Response(result, status=status.HTTP_200_OK)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# from rest_framework.views import APIView
			
 
				+# from rest_framework.response import Response
			
 
				+# from rest_framework import status
			
 
				+# from .serializers import (
			
 
				+#     SingleProductRequestSerializer,
			
 
				+#     BatchProductRequestSerializer,
			
 
				+#     ProductAttributeResultSerializer,
			
 
				+#     BatchProductResponseSerializer
			
 
				+# )
			
 
				+# from .services import ProductAttributeService
			
 
				+
			
 
				+
			
 
				+# class ExtractProductAttributesView(APIView):
			
 
				+#     """
			
 
				+#     API endpoint to extract product attributes for a single product.
			
 
				+    
			
 
				+#     POST /api/extract-attributes/
			
 
				+    
			
 
				+#     Request Body:
			
 
				+#     {
			
 
				+#         "title": "Product title (optional)",
			
 
				+#         "short_desc": "Short description (optional)",
			
 
				+#         "long_desc": "Long description (optional)",
			
 
				+#         "mandatory_attrs": {
			
 
				+#             "Attribute1": ["value1", "value2", "value3"],
			
 
				+#             "Attribute2": ["valueA", "valueB"]
			
 
				+#         },
			
 
				+#         "model": "llama-3.1-8b-instant (optional)",
			
 
				+#         "extract_additional": true (optional, default: true)
			
 
				+#     }
			
 
				+#     """
			
 
				+
			
 
				+#     def post(self, request):
			
 
				+#         serializer = SingleProductRequestSerializer(data=request.data)
			
 
				+#         if not serializer.is_valid():
			
 
				+#             return Response(
			
 
				+#                 {"error": serializer.errors},
			
 
				+#                 status=status.HTTP_400_BAD_REQUEST
			
 
				+#             )
			
 
				+
			
 
				+#         validated_data = serializer.validated_data
			
 
				+
			
 
				+#         product_text = ProductAttributeService.combine_product_text(
			
 
				+#             title=validated_data.get('title'),
			
 
				+#             short_desc=validated_data.get('short_desc'),
			
 
				+#             long_desc=validated_data.get('long_desc')
			
 
				+#         )
			
 
				+
			
 
				+#         result = ProductAttributeService.extract_attributes(
			
 
				+#             product_text=product_text,
			
 
				+#             mandatory_attrs=validated_data['mandatory_attrs'],
			
 
				+#             model=validated_data.get('model'),
			
 
				+#             extract_additional=validated_data.get('extract_additional', True)
			
 
				+#         )
			
 
				+
			
 
				+#         response_serializer = ProductAttributeResultSerializer(data=result)
			
 
				+#         if response_serializer.is_valid():
			
 
				+#             return Response(response_serializer.data, status=status.HTTP_200_OK)
			
 
				+        
			
 
				+#         return Response(result, status=status.HTTP_200_OK)
			
 
				+
			
 
				+
			
 
				+# class BatchExtractProductAttributesView(APIView):
			
 
				+#     """
			
 
				+#     API endpoint to extract product attributes for multiple products in batch.
			
 
				+    
			
 
				+#     POST /api/batch-extract-attributes/
			
 
				+    
			
 
				+#     Request Body:
			
 
				+#     {
			
 
				+#         "products": [
			
 
				+#             {
			
 
				+#                 "product_id": "prod_001",
			
 
				+#                 "title": "Product 1 title",
			
 
				+#                 "short_desc": "Short description",
			
 
				+#                 "long_desc": "Long description"
			
 
				+#             },
			
 
				+#             {
			
 
				+#                 "product_id": "prod_002",
			
 
				+#                 "title": "Product 2 title",
			
 
				+#                 "short_desc": "Short description"
			
 
				+#             }
			
 
				+#         ],
			
 
				+#         "mandatory_attrs": {
			
 
				+#             "Attribute1": ["value1", "value2", "value3"],
			
 
				+#             "Attribute2": ["valueA", "valueB"]
			
 
				+#         },
			
 
				+#         "model": "llama-3.1-8b-instant (optional)",
			
 
				+#         "extract_additional": true (optional, default: true)
			
 
				+#     }
			
 
				+    
			
 
				+#     Response:
			
 
				+#     {
			
 
				+#         "results": [
			
 
				+#             {
			
 
				+#                 "product_id": "prod_001",
			
 
				+#                 "mandatory": {...},
			
 
				+#                 "additional": {...}
			
 
				+#             },
			
 
				+#             {
			
 
				+#                 "product_id": "prod_002",
			
 
				+#                 "mandatory": {...},
			
 
				+#                 "additional": {...}
			
 
				+#             }
			
 
				+#         ],
			
 
				+#         "total_products": 2,
			
 
				+#         "successful": 2,
			
 
				+#         "failed": 0
			
 
				+#     }
			
 
				+#     """
			
 
				+
			
 
				+#     def post(self, request):
			
 
				+#         serializer = BatchProductRequestSerializer(data=request.data)
			
 
				+#         if not serializer.is_valid():
			
 
				+#             return Response(
			
 
				+#                 {"error": serializer.errors},
			
 
				+#                 status=status.HTTP_400_BAD_REQUEST
			
 
				+#             )
			
 
				+
			
 
				+#         validated_data = serializer.validated_data
			
 
				+
			
 
				+#         # Extract attributes for all products in batch
			
 
				+#         result = ProductAttributeService.extract_attributes_batch(
			
 
				+#             products=validated_data['products'],
			
 
				+#             mandatory_attrs=validated_data['mandatory_attrs'],
			
 
				+#             model=validated_data.get('model'),
			
 
				+#             extract_additional=validated_data.get('extract_additional', True)
			
 
				+#         )
			
 
				+
			
 
				+#         response_serializer = BatchProductResponseSerializer(data=result)
			
 
				+#         if response_serializer.is_valid():
			
 
				+#             return Response(response_serializer.data, status=status.HTTP_200_OK)
			
 
				+        
			
 
				+#         return Response(result, status=status.HTTP_200_OK)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== views.py ====================
			
 
				+from rest_framework.views import APIView
			
 
				+from rest_framework.response import Response
			
 
				+from rest_framework import status
			
 
				+from .serializers import (
			
 
				+    SingleProductRequestSerializer,
			
 
				+    BatchProductRequestSerializer,
			
 
				+    ProductAttributeResultSerializer,
			
 
				+    BatchProductResponseSerializer
			
 
				+)
			
 
				+from .services import ProductAttributeService
			
 
				+from .ocr_service import OCRService
			
 
				+
			
 
				+
			
 
				+class ExtractProductAttributesView(APIView):
			
 
				+    """
			
 
				+    API endpoint to extract product attributes for a single product.
			
 
				+    Now supports image URL for OCR-based text extraction.
			
 
				+    """
			
 
				+
			
 
				+    def post(self, request):
			
 
				+        serializer = SingleProductRequestSerializer(data=request.data)
			
 
				+        if not serializer.is_valid():
			
 
				+            return Response(
			
 
				+                {"error": serializer.errors},
			
 
				+                status=status.HTTP_400_BAD_REQUEST
			
 
				+            )
			
 
				+
			
 
				+        validated_data = serializer.validated_data
			
 
				+        
			
 
				+        # Process image if URL provided
			
 
				+        ocr_results = None
			
 
				+        ocr_text = None
			
 
				+        
			
 
				+        if validated_data.get('process_image', True) and validated_data.get('image_url'):
			
 
				+            ocr_service = OCRService()
			
 
				+            ocr_results = ocr_service.process_image(validated_data['image_url'])
			
 
				+            
			
 
				+            # Extract attributes from OCR
			
 
				+            if ocr_results and ocr_results.get('detected_text'):
			
 
				+                ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
			
 
				+                    ocr_results,
			
 
				+                    validated_data.get('model')
			
 
				+                )
			
 
				+                ocr_results['extracted_attributes'] = ocr_attrs
			
 
				+                
			
 
				+                # Format OCR text
			
 
				+                ocr_text = "\n".join([
			
 
				+                    f"{item['text']} (confidence: {item['confidence']:.2f})"
			
 
				+                    for item in ocr_results['detected_text']
			
 
				+                ])
			
 
				+
			
 
				+        # Combine all product information
			
 
				+        product_text = ProductAttributeService.combine_product_text(
			
 
				+            title=validated_data.get('title'),
			
 
				+            short_desc=validated_data.get('short_desc'),
			
 
				+            long_desc=validated_data.get('long_desc'),
			
 
				+            ocr_text=ocr_text
			
 
				+        )
			
 
				+
			
 
				+        # Extract attributes
			
 
				+        result = ProductAttributeService.extract_attributes(
			
 
				+            product_text=product_text,
			
 
				+            mandatory_attrs=validated_data['mandatory_attrs'],
			
 
				+            model=validated_data.get('model'),
			
 
				+            extract_additional=validated_data.get('extract_additional', True)
			
 
				+        )
			
 
				+        
			
 
				+        # Add OCR results if available
			
 
				+        if ocr_results:
			
 
				+            result['ocr_results'] = ocr_results
			
 
				+
			
 
				+        response_serializer = ProductAttributeResultSerializer(data=result)
			
 
				+        if response_serializer.is_valid():
			
 
				+            return Response(response_serializer.data, status=status.HTTP_200_OK)
			
 
				+        
			
 
				+        return Response(result, status=status.HTTP_200_OK)
			
 
				+
			
 
				+
			
 
				+class BatchExtractProductAttributesView(APIView):
			
 
				+    """
			
 
				+    API endpoint to extract product attributes for multiple products in batch.
			
 
				+    Now supports image URLs for OCR-based text extraction.
			
 
				+    """
			
 
				+
			
 
				+    def post(self, request):
			
 
				+        serializer = BatchProductRequestSerializer(data=request.data)
			
 
				+        if not serializer.is_valid():
			
 
				+            return Response(
			
 
				+                {"error": serializer.errors},
			
 
				+                status=status.HTTP_400_BAD_REQUEST
			
 
				+            )
			
 
				+
			
 
				+        validated_data = serializer.validated_data
			
 
				+
			
 
				+        # Extract attributes for all products in batch
			
 
				+        result = ProductAttributeService.extract_attributes_batch(
			
 
				+            products=validated_data['products'],
			
 
				+            mandatory_attrs=validated_data['mandatory_attrs'],
			
 
				+            model=validated_data.get('model'),
			
 
				+            extract_additional=validated_data.get('extract_additional', True),
			
 
				+            process_image=validated_data.get('process_image', True)
			
 
				+        )
			
 
				+
			
 
				+        response_serializer = BatchProductResponseSerializer(data=result)
			
 
				+        if response_serializer.is_valid():
			
 
				+            return Response(response_serializer.data, status=status.HTTP_200_OK)
			
 
				+        
			
 
				+        return Response(result, status=status.HTTP_200_OK)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+from rest_framework.views import APIView
			
 
				+from rest_framework.response import Response
			
 
				+from rest_framework import status
			
 
				+from .models import Product
			
 
				+from .serializers import ProductSerializer
			
 
				+
			
 
				+class ProductListView(APIView):
			
 
				+    """
			
 
				+    GET API to list all products with details
			
 
				+    """
			
 
				+    def get(self, request):
			
 
				+        products = Product.objects.all()
			
 
				+        serializer = ProductSerializer(products, many=True)
			
 
				+        return Response(serializer.data, status=status.HTTP_200_OK)
			
 
				+
			
--- a/content_quality_tool/__pycache__/settings.cpython-313.pyc
+++ b/content_quality_tool/__pycache__/settings.cpython-313.pyc
--- a/content_quality_tool/__pycache__/urls.cpython-313.pyc
+++ b/content_quality_tool/__pycache__/urls.cpython-313.pyc
--- a/content_quality_tool/settings.py
+++ b/content_quality_tool/settings.py
@@ -29,6 +29,7 @@ INSTALLED_APPS = [
 
				     'django.contrib.staticfiles',
			
 
				     'core',
			
 
				     'rest_framework',
			
 
				+    'attr_extraction',
			
 
				 ]
			
 
				 MIDDLEWARE = [
			
 
				     'django.middleware.security.SecurityMiddleware',
			
@@ -118,3 +119,7 @@ MESSAGE_TAGS = {
 
				 
			
 
				 
			
 
				 
			
 
				+GROQ_API_KEY = "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"
			
 
				+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
			
 
				+SUPPORTED_MODELS = ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]
			
 
				+MAX_BATCH_SIZE = 100  # Maximum products per batch request
			
--- a/content_quality_tool/urls.py
+++ b/content_quality_tool/urls.py
@@ -29,6 +29,7 @@ urlpatterns = [
 
				 
			
 
				     # api url
			
 
				     path("core/", include("core.urls")),
			
 
				+    path("attr/", include("attr_extraction.urls")),
			
 
				     # path("", views.login_view, name="login_view"),
			
 
				 ]
			
 
				 
			
--- a/core/__pycache__/models.cpython-313.pyc
+++ b/core/__pycache__/models.cpython-313.pyc
--- a/core/services/__pycache__/attribute_scorer.cpython-313.pyc
+++ b/core/services/__pycache__/attribute_scorer.cpython-313.pyc
--- a/db.sqlite3
+++ b/db.sqlite3