3 месяцев назад · dbceaba7d7
--- a/attr_extraction/__init__.py
+++ b/attr_extraction/__init__.py
--- a/attr_extraction/admin.py
+++ b/attr_extraction/admin.py
@@ -1,3 +0,0 @@
 
															-from django.contrib import admin
														
 
															-
														
 
															-# Register your models here.
														
--- a/attr_extraction/apps.py
+++ b/attr_extraction/apps.py
@@ -1,6 +0,0 @@
 
															-from django.apps import AppConfig
														
 
															-
														
 
															-
														
 
															-class AttrExtractionConfig(AppConfig):
														
 
															-    default_auto_field = 'django.db.models.BigAutoField'
														
 
															-    name = 'attr_extraction'
														
--- a/attr_extraction/migrations/__init__.py
+++ b/attr_extraction/migrations/__init__.py
--- a/attr_extraction/models.py
+++ b/attr_extraction/models.py
@@ -1,54 +0,0 @@
 
															-# models.py
														
 
															-from django.db import models
														
 
															-from django.contrib.postgres.fields import JSONField
														
 
															-
														
 
															-class Product(models.Model):
														
 
															-    title = models.CharField(max_length=500)
														
 
															-    description = models.TextField()
														
 
															-    short_description = models.TextField(blank=True)
														
 
															-    attributes_extracted = models.BooleanField(default=False)
														
 
															-    created_at = models.DateTimeField(auto_now_add=True)
														
 
															-    updated_at = models.DateTimeField(auto_now=True)
														
 
															-    
														
 
															-    class Meta:
														
 
															-        db_table = 'products'
														
 
															-        indexes = [
														
 
															-            models.Index(fields=['attributes_extracted', 'created_at']),
														
 
															-        ]
														
 
															-
														
 
															-class ProductImage(models.Model):
														
 
															-    product = models.ForeignKey(Product, related_name='images', on_delete=models.CASCADE)
														
 
															-    image = models.ImageField(upload_to='products/')
														
 
															-    order = models.PositiveIntegerField(default=0)
														
 
															-    
														
 
															-    class Meta:
														
 
															-        db_table = 'product_images'
														
 
															-        ordering = ['order']
														
 
															-
														
 
															-class ProductAttribute(models.Model):
														
 
															-    product = models.ForeignKey(Product, related_name='attributes', on_delete=models.CASCADE)
														
 
															-    attribute_name = models.CharField(max_length=100, db_index=True)
														
 
															-    attribute_value = models.TextField()
														
 
															-    confidence_score = models.FloatField(default=0.0)
														
 
															-    extraction_method = models.CharField(
														
 
															-        max_length=20,
														
 
															-        choices=[('nlp', 'NLP'), ('llm', 'LLM'), ('hybrid', 'Hybrid')],
														
 
															-        default='hybrid'
														
 
															-    )
														
 
															-    needs_review = models.BooleanField(default=False)
														
 
															-    reviewed = models.BooleanField(default=False)
														
 
															-    created_at = models.DateTimeField(auto_now_add=True)
														
 
															-    
														
 
															-    class Meta:
														
 
															-        db_table = 'product_attributes'
														
 
															-        unique_together = ['product', 'attribute_name']
														
 
															-        indexes = [
														
 
															-            models.Index(fields=['attribute_name', 'confidence_score']),
														
 
															-            models.Index(fields=['needs_review', 'reviewed']),
														
 
															-        ]
														
 
															-    
														
 
															-    def save(self, *args, **kwargs):
														
 
															-        # Auto-flag low confidence for review
														
 
															-        if self.confidence_score < 0.7:
														
 
															-            self.needs_review = True
														
 
															-        super().save(*args, **kwargs)
														
--- a/attr_extraction/serializers.py
+++ b/attr_extraction/serializers.py
@@ -1,13 +0,0 @@
 
															-# serializers.py
														
 
															-from rest_framework import serializers
														
 
															-from .models import ProductAttribute
														
 
															-
														
 
															-class ProductAttributeSerializer(serializers.ModelSerializer):
														
 
															-    product_title = serializers.CharField(source='product.title', read_only=True)
														
 
															-    
														
 
															-    class Meta:
														
 
															-        model = ProductAttribute
														
 
															-        fields = ['id', 'product', 'product_title', 'attribute_name', 
														
 
															-                  'attribute_value', 'confidence_score', 'extraction_method',
														
 
															-                  'needs_review', 'reviewed', 'created_at']
														
 
															-
														
--- a/attr_extraction/services/attribute_extractor.py
+++ b/attr_extraction/services/attribute_extractor.py
@@ -1,322 +0,0 @@
 
															-# services/attribute_extractor.py
														
 
															-import re
														
 
															-import spacy
														
 
															-from typing import Dict, List, Optional
														
 
															-from anthropic import Anthropic
														
 
															-import base64
														
 
															-from PIL import Image
														
 
															-import pytesseract
														
 
															-from collections import defaultdict
														
 
															-
														
 
															-class HybridAttributeExtractor:
														
 
															-    """
														
 
															-    Hybrid extractor using NLP for structured data and LLM for complex/ambiguous cases
														
 
															-    """
														
 
															-    
														
 
															-    def __init__(self, anthropic_api_key: str, product_type_mappings: Dict = None):
														
 
															-        self.nlp = spacy.load("en_core_web_sm")
														
 
															-        self.client = Anthropic(api_key=anthropic_api_key)
														
 
															-        self.product_type_mappings = product_type_mappings or self._load_default_mappings()
														
 
															-        
														
 
															-        # Define patterns for common attributes
														
 
															-        self.patterns = {
														
 
															-            'size': [
														
 
															-                r'\b(XXS|XS|S|M|L|XL|XXL|XXXL)\b',
														
 
															-                r'\b(\d+(?:\.\d+)?)\s*(inch|inches|cm|mm|meter|metres?|ft|feet|")\b',
														
 
															-                r'\b(small|medium|large|extra large)\b'
														
 
															-            ],
														
 
															-            'color': [
														
 
															-                r'\b(black|white|red|blue|green|yellow|orange|purple|pink|brown|gray|grey|silver|gold|beige|navy|maroon|olive|teal|turquoise|lavender|cream|ivory)\b'
														
 
															-            ],
														
 
															-            'weight': [
														
 
															-                r'\b(\d+(?:\.\d+)?)\s*(kg|g|lb|lbs|oz|pounds?|grams?|kilograms?)\b'
														
 
															-            ],
														
 
															-            'material': [
														
 
															-                r'\b(cotton|polyester|silk|wool|leather|denim|linen|nylon|spandex|rayon|acrylic|metal|plastic|wood|glass|ceramic|steel|aluminum|rubber)\b'
														
 
															-            ],
														
 
															-            'brand': [
														
 
															-                r'(?:by|from|brand:?)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\s|$|,|\.|;)'
														
 
															-            ]
														
 
															-        }
														
 
															-        
														
 
															-        # Confidence thresholds
														
 
															-        self.confidence_threshold = 0.6
														
 
															-        
														
 
															-    def extract_attributes(self, product_data: Dict) -> Dict:
														
 
															-        """
														
 
															-        Main extraction method - uses NLP first, LLM for gaps
														
 
															-        """
														
 
															-        # Phase 1: Quick NLP extraction
														
 
															-        nlp_attributes = self._extract_with_nlp(
														
 
															-            product_data.get('title', ''),
														
 
															-            product_data.get('description', '')
														
 
															-        )
														
 
															-        
														
 
															-        # Phase 2: OCR from images if provided
														
 
															-        ocr_text = ""
														
 
															-        if product_data.get('images'):
														
 
															-            ocr_text = self._extract_text_from_images(product_data['images'])
														
 
															-            if ocr_text:
														
 
															-                ocr_attributes = self._extract_with_nlp("", ocr_text)
														
 
															-                nlp_attributes = self._merge_attributes(nlp_attributes, ocr_attributes)
														
 
															-        
														
 
															-        # Phase 3: Always call LLM to enrich and validate NLP results
														
 
															-        llm_attributes = self._extract_with_llm(
														
 
															-            product_data,
														
 
															-            nlp_attributes,
														
 
															-            ocr_text
														
 
															-        )
														
 
															-        final_attributes = self._merge_attributes(nlp_attributes, llm_attributes)
														
 
															-        
														
 
															-        return final_attributes
														
 
															-    
														
 
															-    def _extract_with_nlp(self, title: str, description: str) -> Dict:
														
 
															-        """
														
 
															-        Fast extraction using regex and spaCy
														
 
															-        """
														
 
															-        text = f"{title} {description}".lower()
														
 
															-        attributes = defaultdict(list)
														
 
															-        
														
 
															-        # Pattern matching for structured attributes
														
 
															-        for attr_type, patterns in self.patterns.items():
														
 
															-            for pattern in patterns:
														
 
															-                matches = re.finditer(pattern, text, re.IGNORECASE)
														
 
															-                for match in matches:
														
 
															-                    value = match.group(1) if match.groups() else match.group(0)
														
 
															-                    attributes[attr_type].append(value.strip())
														
 
															-        
														
 
															-        # Named Entity Recognition for brands, organizations
														
 
															-        doc = self.nlp(title + " " + description)
														
 
															-        for ent in doc.ents:
														
 
															-            if ent.label_ == "ORG" and 'brand' not in attributes:
														
 
															-                attributes['brand'].append(ent.text)
														
 
															-            elif ent.label_ == "PRODUCT":
														
 
															-                attributes['product_type'].append(ent.text)
														
 
															-            elif ent.label_ == "MONEY":
														
 
															-                attributes['price'].append(ent.text)
														
 
															-        
														
 
															-        # Deduplicate and clean
														
 
															-        cleaned_attributes = {}
														
 
															-        for key, values in attributes.items():
														
 
															-            if values:
														
 
															-                # Take most common or first occurrence
														
 
															-                cleaned_attributes[key] = list(set(values))[0] if len(set(values)) == 1 else values
														
 
															-                cleaned_attributes[f'{key}_confidence'] = 0.8 if len(set(values)) == 1 else 0.5
														
 
															-        
														
 
															-        return cleaned_attributes
														
 
															-    
														
 
															-    def _extract_text_from_images(self, image_paths: List[str]) -> str:
														
 
															-        """
														
 
															-        Extract text from product images using OCR
														
 
															-        """
														
 
															-        extracted_text = []
														
 
															-        
														
 
															-        for img_path in image_paths[:3]:  # Limit to 3 images
														
 
															-            try:
														
 
															-                img = Image.open(img_path)
														
 
															-                text = pytesseract.image_to_string(img)
														
 
															-                if text.strip():
														
 
															-                    extracted_text.append(text.strip())
														
 
															-            except Exception as e:
														
 
															-                print(f"OCR error for {img_path}: {e}")
														
 
															-        
														
 
															-        return " ".join(extracted_text)
														
 
															-    
														
 
															-    def _needs_llm_extraction(self, attributes: Dict, product_data: Dict) -> bool:
														
 
															-        """
														
 
															-        Determine if LLM extraction is needed based on confidence and completeness
														
 
															-        """
														
 
															-        # Check if critical attributes are missing
														
 
															-        critical_attrs = ['category', 'brand', 'color', 'size']
														
 
															-        missing_critical = any(attr not in attributes for attr in critical_attrs)
														
 
															-        
														
 
															-        # Check confidence levels
														
 
															-        low_confidence = any(
														
 
															-            attributes.get(f'{key}_confidence', 0) < self.confidence_threshold
														
 
															-            for key in attributes.keys() if not key.endswith('_confidence')
														
 
															-        )
														
 
															-        
														
 
															-        # Check if description is complex/unstructured
														
 
															-        description = product_data.get('description', '')
														
 
															-        is_complex = len(description.split()) > 100 or 'features' in description.lower()
														
 
															-        
														
 
															-        return missing_critical or low_confidence or is_complex
														
 
															-    
														
 
															-    def _extract_with_llm(self, product_data: Dict, existing_attrs: Dict, ocr_text: str) -> Dict:
														
 
															-        """
														
 
															-        Use LLM to extract comprehensive attributes and validate NLP results
														
 
															-        """
														
 
															-        prompt = f"""Analyze this product and extract ALL possible attributes with high accuracy.
														
 
															-
														
 
															-Title: {product_data.get('title', 'N/A')}
														
 
															-Description: {product_data.get('description', 'N/A')}
														
 
															-Short Description: {product_data.get('short_description', 'N/A')}
														
 
															-Text from images (OCR): {ocr_text if ocr_text else 'N/A'}
														
 
															-
														
 
															-NLP Pre-extracted attributes (validate and enhance): {existing_attrs}
														
 
															-
														
 
															-Extract a comprehensive JSON object with these fields (include all that apply):
														
 
															-
														
 
															-**Basic Info:**
														
 
															-- category: specific product category/type
														
 
															-- subcategory: more specific classification
														
 
															-- brand: brand name
														
 
															-- model: model number/name
														
 
															-- product_line: product series/collection
														
 
															-
														
 
															-**Physical Attributes:**
														
 
															-- color: all colors (list if multiple)
														
 
															-- size: size information (with units)
														
 
															-- dimensions: length/width/height with units
														
 
															-- weight: weight with units
														
 
															-- material: materials used (list all)
														
 
															-- finish: surface finish/texture
														
 
															-
														
 
															-**Technical Specs (if applicable):**
														
 
															-- specifications: key technical specs as object
														
 
															-- compatibility: what it works with
														
 
															-- capacity: storage/volume capacity
														
 
															-- power: power requirements/battery info
														
 
															-
														
 
															-**Commercial Info:**
														
 
															-- condition: new/used/refurbished
														
 
															-- warranty: warranty information
														
 
															-- country_of_origin: manufacturing country
														
 
															-- certifications: safety/quality certifications
														
 
															-
														
 
															-**Descriptive:**
														
 
															-- key_features: list of 5-8 main features
														
 
															-- benefits: main benefits/use cases
														
 
															-- target_audience: who this is for
														
 
															-- usage_instructions: how to use (if mentioned)
														
 
															-- care_instructions: care/maintenance info
														
 
															-- style: style/aesthetic (modern, vintage, etc)
														
 
															-- season: seasonal relevance (if applicable)
														
 
															-- occasion: suitable occasions (if applicable)
														
 
															-
														
 
															-**Additional:**
														
 
															-- package_contents: what's included
														
 
															-- variants: available variants/options
														
 
															-- tags: relevant search tags (list)
														
 
															-
														
 
															-Only include fields where you have high confidence. Use null for uncertain values.
														
 
															-For lists, provide all relevant items. Be thorough and extract every possible detail."""
														
 
															-
														
 
															-        content = [{"type": "text", "text": prompt}]
														
 
															-        
														
 
															-        # Add images if available
														
 
															-        if product_data.get('images'):
														
 
															-            for img_path in product_data['images'][:3]:  # Include up to 3 images for better context
														
 
															-                try:
														
 
															-                    with open(img_path, 'rb') as f:
														
 
															-                        img_data = base64.b64encode(f.read()).decode()
														
 
															-                    
														
 
															-                    # Determine media type
														
 
															-                    media_type = "image/jpeg"
														
 
															-                    if img_path.lower().endswith('.png'):
														
 
															-                        media_type = "image/png"
														
 
															-                    elif img_path.lower().endswith('.webp'):
														
 
															-                        media_type = "image/webp"
														
 
															-                    
														
 
															-                    content.append({
														
 
															-                        "type": "image",
														
 
															-                        "source": {
														
 
															-                            "type": "base64",
														
 
															-                            "media_type": media_type,
														
 
															-                            "data": img_data
														
 
															-                        }
														
 
															-                    })
														
 
															-                except Exception as e:
														
 
															-                    print(f"Error processing image {img_path}: {e}")
														
 
															-        
														
 
															-        try:
														
 
															-            response = self.client.messages.create(
														
 
															-                model="claude-sonnet-4-20250514",
														
 
															-                max_tokens=2048,  # Increased for comprehensive extraction
														
 
															-                messages=[{"role": "user", "content": content}]
														
 
															-            )
														
 
															-            
														
 
															-            # Parse JSON response
														
 
															-            import json
														
 
															-            llm_result = json.loads(response.content[0].text)
														
 
															-            
														
 
															-            # Add high confidence to LLM results
														
 
															-            for key in llm_result:
														
 
															-                if llm_result[key] is not None:
														
 
															-                    llm_result[f'{key}_confidence'] = 0.95
														
 
															-            
														
 
															-            return llm_result
														
 
															-        
														
 
															-        except Exception as e:
														
 
															-            print(f"LLM extraction error: {e}")
														
 
															-            return {}
														
 
															-    
														
 
															-    def _identify_missing_attributes(self, existing_attrs: Dict) -> List[str]:
														
 
															-        """
														
 
															-        Identify which attributes are missing or low confidence
														
 
															-        """
														
 
															-        important_attrs = ['category', 'brand', 'color', 'size', 'material', 'key_features']
														
 
															-        missing = []
														
 
															-        
														
 
															-        for attr in important_attrs:
														
 
															-            if attr not in existing_attrs or existing_attrs.get(f'{attr}_confidence', 0) < 0.7:
														
 
															-                missing.append(attr)
														
 
															-        
														
 
															-        return missing
														
 
															-    
														
 
															-    def _merge_attributes(self, base: Dict, additional: Dict) -> Dict:
														
 
															-        """
														
 
															-        Intelligently merge attributes, preferring LLM for new attributes and validation
														
 
															-        """
														
 
															-        merged = {}
														
 
															-        
														
 
															-        # Start with all NLP attributes
														
 
															-        for key, value in base.items():
														
 
															-            if not key.endswith('_confidence'):
														
 
															-                merged[key] = value
														
 
															-                merged[f'{key}_confidence'] = base.get(f'{key}_confidence', 0.7)
														
 
															-        
														
 
															-        # Add or override with LLM attributes
														
 
															-        for key, value in additional.items():
														
 
															-            if key.endswith('_confidence'):
														
 
															-                continue
														
 
															-            
														
 
															-            if value is None:
														
 
															-                # Keep NLP value if LLM returns null
														
 
															-                continue
														
 
															-            
														
 
															-            # LLM found new attribute or better value
														
 
															-            if key not in merged:
														
 
															-                merged[key] = value
														
 
															-                merged[f'{key}_confidence'] = additional.get(f'{key}_confidence', 0.95)
														
 
															-            else:
														
 
															-                # Compare values - if different, prefer LLM but mark for review
														
 
															-                llm_conf = additional.get(f'{key}_confidence', 0.95)
														
 
															-                nlp_conf = merged.get(f'{key}_confidence', 0.7)
														
 
															-                
														
 
															-                if str(value).lower() != str(merged[key]).lower():
														
 
															-                    # Values differ - use LLM but add conflict flag
														
 
															-                    merged[key] = value
														
 
															-                    merged[f'{key}_confidence'] = llm_conf
														
 
															-                    merged[f'{key}_nlp_value'] = base.get(key)  # Store NLP value for reference
														
 
															-                    merged[f'{key}_conflict'] = True
														
 
															-                else:
														
 
															-                    # Values match - boost confidence
														
 
															-                    merged[key] = value
														
 
															-                    merged[f'{key}_confidence'] = min(0.99, (llm_conf + nlp_conf) / 2 + 0.1)
														
 
															-        
														
 
															-        return merged
														
 
															-
														
 
															-
														
 
															-# Example usage
														
 
															-if __name__ == "__main__":
														
 
															-    extractor = HybridAttributeExtractor(anthropic_api_key="your-api-key")
														
 
															-    
														
 
															-    product = {
														
 
															-        'title': 'Nike Air Max 270 Running Shoes - Black/White',
														
 
															-        'description': 'Premium running shoes with Max Air cushioning. Breathable mesh upper, rubber outsole. Perfect for daily training.',
														
 
															-        'images': ['path/to/image1.jpg', 'path/to/image2.jpg']
														
 
															-    }
														
 
															-    
														
 
															-    attributes = extractor.extract_attributes(product)
														
 
															-    print(attributes)
														
--- a/attr_extraction/tasks.py
+++ b/attr_extraction/tasks.py
@@ -1,78 +0,0 @@
 
															-# tasks.py
														
 
															-from celery import shared_task
														
 
															-from django.core.cache import cache
														
 
															-from .models import Product, ProductAttribute
														
 
															-from .services.attribute_extractor import HybridAttributeExtractor
														
 
															-import json
														
 
															-import hashlib
														
 
															-
														
 
															-@shared_task(bind=True, max_retries=3)
														
 
															-def extract_product_attributes(self, product_id: int):
														
 
															-    """
														
 
															-    Celery task to extract attributes from a product
														
 
															-    """
														
 
															-    try:
														
 
															-        product = Product.objects.get(id=product_id)
														
 
															-        
														
 
															-        # Check cache first
														
 
															-        cache_key = f"product_attrs_{product.id}_{product.updated_at.timestamp()}"
														
 
															-        cached_attrs = cache.get(cache_key)
														
 
															-        
														
 
															-        if cached_attrs:
														
 
															-            return cached_attrs
														
 
															-        
														
 
															-        # Prepare product data
														
 
															-        product_data = {
														
 
															-            'title': product.title,
														
 
															-            'description': product.description,
														
 
															-            'short_description': product.short_description,
														
 
															-            'images': [img.image.path for img in product.images.all()]
														
 
															-        }
														
 
															-        
														
 
															-        # Extract attributes
														
 
															-        extractor = HybridAttributeExtractor(
														
 
															-            anthropic_api_key=settings.ANTHROPIC_API_KEY
														
 
															-        )
														
 
															-        attributes = extractor.extract_attributes(product_data)
														
 
															-        
														
 
															-        # Save to database
														
 
															-        for attr_name, attr_value in attributes.items():
														
 
															-            if not attr_name.endswith('_confidence'):
														
 
															-                confidence = attributes.get(f'{attr_name}_confidence', 0.5)
														
 
															-                
														
 
															-                ProductAttribute.objects.update_or_create(
														
 
															-                    product=product,
														
 
															-                    attribute_name=attr_name,
														
 
															-                    defaults={
														
 
															-                        'attribute_value': json.dumps(attr_value) if isinstance(attr_value, (list, dict)) else str(attr_value),
														
 
															-                        'confidence_score': confidence,
														
 
															-                        'extraction_method': 'hybrid'
														
 
															-                    }
														
 
															-                )
														
 
															-        
														
 
															-        # Cache for 24 hours
														
 
															-        cache.set(cache_key, attributes, 86400)
														
 
															-        
														
 
															-        # Update product status
														
 
															-        product.attributes_extracted = True
														
 
															-        product.save()
														
 
															-        
														
 
															-        return attributes
														
 
															-        
														
 
															-    except Product.DoesNotExist:
														
 
															-        return {'error': 'Product not found'}
														
 
															-    except Exception as e:
														
 
															-        # Retry with exponential backoff
														
 
															-        raise self.retry(exc=e, countdown=60 * (2 ** self.request.retries))
														
 
															-
														
 
															-
														
 
															-@shared_task
														
 
															-def batch_extract_attributes(product_ids: list):
														
 
															-    """
														
 
															-    Process multiple products in batch
														
 
															-    """
														
 
															-    results = {}
														
 
															-    for product_id in product_ids:
														
 
															-        result = extract_product_attributes.delay(product_id)
														
 
															-        results[product_id] = result.id
														
 
															-    return results
														
--- a/attr_extraction/tests.py
+++ b/attr_extraction/tests.py
@@ -1,3 +0,0 @@
 
															-from django.test import TestCase
														
 
															-
														
 
															-# Create your tests here.
														
--- a/attr_extraction/urls.py
+++ b/attr_extraction/urls.py
@@ -1,15 +0,0 @@
 
															-from django.urls import path
														
 
															-from .views import (
														
 
															-    ExtractAttributesView, 
														
 
															-    BatchExtractAttributesView,
														
 
															-    ProductAttributesView,
														
 
															-    AttributeReviewView
														
 
															-)
														
 
															-
														
 
															-urlpatterns = [
														
 
															-    path('products/<int:product_id>/extract/', ExtractAttributesView.as_view()),
														
 
															-    path('products/batch-extract/', BatchExtractAttributesView.as_view()),
														
 
															-    path('products/<int:product_id>/attributes/', ProductAttributesView.as_view()),
														
 
															-    path('attributes/review/', AttributeReviewView.as_view()),
														
 
															-    path('attributes/<int:attribute_id>/review/', AttributeReviewView.as_view()),
														
 
															-]
														
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -1,99 +0,0 @@
 
															-from django.shortcuts import render
														
 
															-
														
 
															-# Create your views here.
														
 
															-# views.py
														
 
															-from rest_framework.views import APIView
														
 
															-from rest_framework.response import Response
														
 
															-from rest_framework import status
														
 
															-from .tasks import extract_product_attributes, batch_extract_attributes
														
 
															-from .models import Product, ProductAttribute
														
 
															-from .serializers import ProductAttributeSerializer
														
 
															-
														
 
															-class ExtractAttributesView(APIView):
														
 
															-    """
														
 
															-    Trigger attribute extraction for a product
														
 
															-    """
														
 
															-    def post(self, request, product_id):
														
 
															-        try:
														
 
															-            product = Product.objects.get(id=product_id)
														
 
															-            
														
 
															-            # Trigger async task
														
 
															-            task = extract_product_attributes.delay(product_id)
														
 
															-            
														
 
															-            return Response({
														
 
															-                'message': 'Extraction started',
														
 
															-                'task_id': task.id,
														
 
															-                'product_id': product_id
														
 
															-            }, status=status.HTTP_202_ACCEPTED)
														
 
															-            
														
 
															-        except Product.DoesNotExist:
														
 
															-            return Response({'error': 'Product not found'}, status=status.HTTP_404_NOT_FOUND)
														
 
															-
														
 
															-
														
 
															-class BatchExtractAttributesView(APIView):
														
 
															-    """
														
 
															-    Trigger batch extraction
														
 
															-    """
														
 
															-    def post(self, request):
														
 
															-        product_ids = request.data.get('product_ids', [])
														
 
															-        
														
 
															-        if not product_ids:
														
 
															-            return Response({'error': 'No product IDs provided'}, status=status.HTTP_400_BAD_REQUEST)
														
 
															-        
														
 
															-        task_results = batch_extract_attributes.delay(product_ids)
														
 
															-        
														
 
															-        return Response({
														
 
															-            'message': f'Batch extraction started for {len(product_ids)} products',
														
 
															-            'task_id': task_results.id
														
 
															-        }, status=status.HTTP_202_ACCEPTED)
														
 
															-
														
 
															-
														
 
															-class ProductAttributesView(APIView):
														
 
															-    """
														
 
															-    Get extracted attributes for a product
														
 
															-    """
														
 
															-    def get(self, request, product_id):
														
 
															-        try:
														
 
															-            product = Product.objects.get(id=product_id)
														
 
															-            attributes = ProductAttribute.objects.filter(product=product)
														
 
															-            
														
 
															-            serializer = ProductAttributeSerializer(attributes, many=True)
														
 
															-            
														
 
															-            return Response({
														
 
															-                'product_id': product_id,
														
 
															-                'attributes_extracted': product.attributes_extracted,
														
 
															-                'attributes': serializer.data
														
 
															-            })
														
 
															-            
														
 
															-        except Product.DoesNotExist:
														
 
															-            return Response({'error': 'Product not found'}, status=status.HTTP_404_NOT_FOUND)
														
 
															-
														
 
															-
														
 
															-class AttributeReviewView(APIView):
														
 
															-    """
														
 
															-    Review and update low-confidence attributes
														
 
															-    """
														
 
															-    def get(self, request):
														
 
															-        # Get attributes needing review
														
 
															-        attributes = ProductAttribute.objects.filter(
														
 
															-            needs_review=True,
														
 
															-            reviewed=False
														
 
															-        ).select_related('product')[:50]
														
 
															-        
														
 
															-        serializer = ProductAttributeSerializer(attributes, many=True)
														
 
															-        return Response(serializer.data)
														
 
															-    
														
 
															-    def patch(self, request, attribute_id):
														
 
															-        try:
														
 
															-            attribute = ProductAttribute.objects.get(id=attribute_id)
														
 
															-            
														
 
															-            # Update attribute
														
 
															-            attribute.attribute_value = request.data.get('attribute_value', attribute.attribute_value)
														
 
															-            attribute.reviewed = True
														
 
															-            attribute.confidence_score = 1.0  # Human verified
														
 
															-            attribute.save()
														
 
															-            
														
 
															-            return Response({'message': 'Attribute updated'})
														
 
															-            
														
 
															-        except ProductAttribute.DoesNotExist:
														
 
															-            return Response({'error': 'Attribute not found'}, status=status.HTTP_404_NOT_FOUND)
														
--- a/celery.py
+++ b/celery.py
@@ -1,30 +0,0 @@
 
															-# celery.py (in your project root)
														
 
															-import os
														
 
															-from celery import Celery
														
 
															-
														
 
															-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'your_project.settings')
														
 
															-
														
 
															-app = Celery('your_project')
														
 
															-app.config_from_object('django.conf:settings', namespace='CELERY')
														
 
															-app.autodiscover_tasks()
														
 
															-
														
 
															-
														
 
															-# settings.py additions
														
 
															-CELERY_BROKER_URL = 'redis://localhost:6379/0'
														
 
															-CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
														
 
															-CELERY_TASK_SERIALIZER = 'json'
														
 
															-CELERY_ACCEPT_CONTENT = ['json']
														
 
															-CELERY_RESULT_SERIALIZER = 'json'
														
 
															-CELERY_TIMEZONE = 'UTC'
														
 
															-
														
 
															-CACHES = {
														
 
															-    'default': {
														
 
															-        'BACKEND': 'django_redis.cache.RedisCache',
														
 
															-        'LOCATION': 'redis://127.0.0.1:6379/1',
														
 
															-        'OPTIONS': {
														
 
															-            'CLIENT_CLASS': 'django_redis.client.DefaultClient',
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-ANTHROPIC_API_KEY = os.environ.get('ANTHROPIC_API_KEY')
														
--- a/content_quality_tool/settings.py
+++ b/content_quality_tool/settings.py
@@ -29,7 +29,6 @@ INSTALLED_APPS = [
 
															     'django.contrib.staticfiles',
														
 
															     'core',
														
 
															     'rest_framework',
														
 
															-    'attr_extraction',
														
 
															 ]
														
 
															 MIDDLEWARE = [
														
 
															     'django.middleware.security.SecurityMiddleware',
														
@@ -104,6 +103,8 @@ MEDIA_ROOT = BASE_DIR / 'media'
 
															 MEDIA_URL = '/media/'
														
 
															 from django.conf import settings
														
 
															 from django.conf.urls.static import static
														
 
															+
														
 
															+
														
 
															 urlpatterns = [
														
 
															     # ... your routes
														
 
															 ] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
														
@@ -117,23 +118,3 @@ MESSAGE_TAGS = {
 
															-
														
 
															-# settings.py additions
														
 
															-CELERY_BROKER_URL = 'redis://localhost:6379/0'
														
 
															-CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
														
 
															-CELERY_TASK_SERIALIZER = 'json'
														
 
															-CELERY_ACCEPT_CONTENT = ['json']
														
 
															-CELERY_RESULT_SERIALIZER = 'json'
														
 
															-CELERY_TIMEZONE = 'UTC'
														
 
															-
														
 
															-CACHES = {
														
 
															-    'default': {
														
 
															-        'BACKEND': 'django_redis.cache.RedisCache',
														
 
															-        'LOCATION': 'redis://127.0.0.1:6379/1',
														
 
															-        'OPTIONS': {
														
 
															-            'CLIENT_CLASS': 'django_redis.client.DefaultClient',
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-ANTHROPIC_API_KEY = os.environ.get('ANTHROPIC_API_KEY')