3 月之前 · dbceaba7d7
--- a/attr_extraction/__init__.py
+++ b/attr_extraction/__init__.py
--- a/attr_extraction/admin.py
+++ b/attr_extraction/admin.py
@@ -1,3 +0,0 @@
 
				-from django.contrib import admin
			
 
				-
			
 
				-# Register your models here.
			
--- a/attr_extraction/apps.py
+++ b/attr_extraction/apps.py
@@ -1,6 +0,0 @@
 
				-from django.apps import AppConfig
			
 
				-
			
 
				-
			
 
				-class AttrExtractionConfig(AppConfig):
			
 
				-    default_auto_field = 'django.db.models.BigAutoField'
			
 
				-    name = 'attr_extraction'
			
--- a/attr_extraction/migrations/__init__.py
+++ b/attr_extraction/migrations/__init__.py
--- a/attr_extraction/models.py
+++ b/attr_extraction/models.py
@@ -1,54 +0,0 @@
 
				-# models.py
			
 
				-from django.db import models
			
 
				-from django.contrib.postgres.fields import JSONField
			
 
				-
			
 
				-class Product(models.Model):
			
 
				-    title = models.CharField(max_length=500)
			
 
				-    description = models.TextField()
			
 
				-    short_description = models.TextField(blank=True)
			
 
				-    attributes_extracted = models.BooleanField(default=False)
			
 
				-    created_at = models.DateTimeField(auto_now_add=True)
			
 
				-    updated_at = models.DateTimeField(auto_now=True)
			
 
				-    
			
 
				-    class Meta:
			
 
				-        db_table = 'products'
			
 
				-        indexes = [
			
 
				-            models.Index(fields=['attributes_extracted', 'created_at']),
			
 
				-        ]
			
 
				-
			
 
				-class ProductImage(models.Model):
			
 
				-    product = models.ForeignKey(Product, related_name='images', on_delete=models.CASCADE)
			
 
				-    image = models.ImageField(upload_to='products/')
			
 
				-    order = models.PositiveIntegerField(default=0)
			
 
				-    
			
 
				-    class Meta:
			
 
				-        db_table = 'product_images'
			
 
				-        ordering = ['order']
			
 
				-
			
 
				-class ProductAttribute(models.Model):
			
 
				-    product = models.ForeignKey(Product, related_name='attributes', on_delete=models.CASCADE)
			
 
				-    attribute_name = models.CharField(max_length=100, db_index=True)
			
 
				-    attribute_value = models.TextField()
			
 
				-    confidence_score = models.FloatField(default=0.0)
			
 
				-    extraction_method = models.CharField(
			
 
				-        max_length=20,
			
 
				-        choices=[('nlp', 'NLP'), ('llm', 'LLM'), ('hybrid', 'Hybrid')],
			
 
				-        default='hybrid'
			
 
				-    )
			
 
				-    needs_review = models.BooleanField(default=False)
			
 
				-    reviewed = models.BooleanField(default=False)
			
 
				-    created_at = models.DateTimeField(auto_now_add=True)
			
 
				-    
			
 
				-    class Meta:
			
 
				-        db_table = 'product_attributes'
			
 
				-        unique_together = ['product', 'attribute_name']
			
 
				-        indexes = [
			
 
				-            models.Index(fields=['attribute_name', 'confidence_score']),
			
 
				-            models.Index(fields=['needs_review', 'reviewed']),
			
 
				-        ]
			
 
				-    
			
 
				-    def save(self, *args, **kwargs):
			
 
				-        # Auto-flag low confidence for review
			
 
				-        if self.confidence_score < 0.7:
			
 
				-            self.needs_review = True
			
 
				-        super().save(*args, **kwargs)
			
--- a/attr_extraction/serializers.py
+++ b/attr_extraction/serializers.py
@@ -1,13 +0,0 @@
 
				-# serializers.py
			
 
				-from rest_framework import serializers
			
 
				-from .models import ProductAttribute
			
 
				-
			
 
				-class ProductAttributeSerializer(serializers.ModelSerializer):
			
 
				-    product_title = serializers.CharField(source='product.title', read_only=True)
			
 
				-    
			
 
				-    class Meta:
			
 
				-        model = ProductAttribute
			
 
				-        fields = ['id', 'product', 'product_title', 'attribute_name', 
			
 
				-                  'attribute_value', 'confidence_score', 'extraction_method',
			
 
				-                  'needs_review', 'reviewed', 'created_at']
			
 
				-
			
--- a/attr_extraction/services/attribute_extractor.py
+++ b/attr_extraction/services/attribute_extractor.py
@@ -1,322 +0,0 @@
 
				-# services/attribute_extractor.py
			
 
				-import re
			
 
				-import spacy
			
 
				-from typing import Dict, List, Optional
			
 
				-from anthropic import Anthropic
			
 
				-import base64
			
 
				-from PIL import Image
			
 
				-import pytesseract
			
 
				-from collections import defaultdict
			
 
				-
			
 
				-class HybridAttributeExtractor:
			
 
				-    """
			
 
				-    Hybrid extractor using NLP for structured data and LLM for complex/ambiguous cases
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self, anthropic_api_key: str, product_type_mappings: Dict = None):
			
 
				-        self.nlp = spacy.load("en_core_web_sm")
			
 
				-        self.client = Anthropic(api_key=anthropic_api_key)
			
 
				-        self.product_type_mappings = product_type_mappings or self._load_default_mappings()
			
 
				-        
			
 
				-        # Define patterns for common attributes
			
 
				-        self.patterns = {
			
 
				-            'size': [
			
 
				-                r'\b(XXS|XS|S|M|L|XL|XXL|XXXL)\b',
			
 
				-                r'\b(\d+(?:\.\d+)?)\s*(inch|inches|cm|mm|meter|metres?|ft|feet|")\b',
			
 
				-                r'\b(small|medium|large|extra large)\b'
			
 
				-            ],
			
 
				-            'color': [
			
 
				-                r'\b(black|white|red|blue|green|yellow|orange|purple|pink|brown|gray|grey|silver|gold|beige|navy|maroon|olive|teal|turquoise|lavender|cream|ivory)\b'
			
 
				-            ],
			
 
				-            'weight': [
			
 
				-                r'\b(\d+(?:\.\d+)?)\s*(kg|g|lb|lbs|oz|pounds?|grams?|kilograms?)\b'
			
 
				-            ],
			
 
				-            'material': [
			
 
				-                r'\b(cotton|polyester|silk|wool|leather|denim|linen|nylon|spandex|rayon|acrylic|metal|plastic|wood|glass|ceramic|steel|aluminum|rubber)\b'
			
 
				-            ],
			
 
				-            'brand': [
			
 
				-                r'(?:by|from|brand:?)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\s|$|,|\.|;)'
			
 
				-            ]
			
 
				-        }
			
 
				-        
			
 
				-        # Confidence thresholds
			
 
				-        self.confidence_threshold = 0.6
			
 
				-        
			
 
				-    def extract_attributes(self, product_data: Dict) -> Dict:
			
 
				-        """
			
 
				-        Main extraction method - uses NLP first, LLM for gaps
			
 
				-        """
			
 
				-        # Phase 1: Quick NLP extraction
			
 
				-        nlp_attributes = self._extract_with_nlp(
			
 
				-            product_data.get('title', ''),
			
 
				-            product_data.get('description', '')
			
 
				-        )
			
 
				-        
			
 
				-        # Phase 2: OCR from images if provided
			
 
				-        ocr_text = ""
			
 
				-        if product_data.get('images'):
			
 
				-            ocr_text = self._extract_text_from_images(product_data['images'])
			
 
				-            if ocr_text:
			
 
				-                ocr_attributes = self._extract_with_nlp("", ocr_text)
			
 
				-                nlp_attributes = self._merge_attributes(nlp_attributes, ocr_attributes)
			
 
				-        
			
 
				-        # Phase 3: Always call LLM to enrich and validate NLP results
			
 
				-        llm_attributes = self._extract_with_llm(
			
 
				-            product_data,
			
 
				-            nlp_attributes,
			
 
				-            ocr_text
			
 
				-        )
			
 
				-        final_attributes = self._merge_attributes(nlp_attributes, llm_attributes)
			
 
				-        
			
 
				-        return final_attributes
			
 
				-    
			
 
				-    def _extract_with_nlp(self, title: str, description: str) -> Dict:
			
 
				-        """
			
 
				-        Fast extraction using regex and spaCy
			
 
				-        """
			
 
				-        text = f"{title} {description}".lower()
			
 
				-        attributes = defaultdict(list)
			
 
				-        
			
 
				-        # Pattern matching for structured attributes
			
 
				-        for attr_type, patterns in self.patterns.items():
			
 
				-            for pattern in patterns:
			
 
				-                matches = re.finditer(pattern, text, re.IGNORECASE)
			
 
				-                for match in matches:
			
 
				-                    value = match.group(1) if match.groups() else match.group(0)
			
 
				-                    attributes[attr_type].append(value.strip())
			
 
				-        
			
 
				-        # Named Entity Recognition for brands, organizations
			
 
				-        doc = self.nlp(title + " " + description)
			
 
				-        for ent in doc.ents:
			
 
				-            if ent.label_ == "ORG" and 'brand' not in attributes:
			
 
				-                attributes['brand'].append(ent.text)
			
 
				-            elif ent.label_ == "PRODUCT":
			
 
				-                attributes['product_type'].append(ent.text)
			
 
				-            elif ent.label_ == "MONEY":
			
 
				-                attributes['price'].append(ent.text)
			
 
				-        
			
 
				-        # Deduplicate and clean
			
 
				-        cleaned_attributes = {}
			
 
				-        for key, values in attributes.items():
			
 
				-            if values:
			
 
				-                # Take most common or first occurrence
			
 
				-                cleaned_attributes[key] = list(set(values))[0] if len(set(values)) == 1 else values
			
 
				-                cleaned_attributes[f'{key}_confidence'] = 0.8 if len(set(values)) == 1 else 0.5
			
 
				-        
			
 
				-        return cleaned_attributes
			
 
				-    
			
 
				-    def _extract_text_from_images(self, image_paths: List[str]) -> str:
			
 
				-        """
			
 
				-        Extract text from product images using OCR
			
 
				-        """
			
 
				-        extracted_text = []
			
 
				-        
			
 
				-        for img_path in image_paths[:3]:  # Limit to 3 images
			
 
				-            try:
			
 
				-                img = Image.open(img_path)
			
 
				-                text = pytesseract.image_to_string(img)
			
 
				-                if text.strip():
			
 
				-                    extracted_text.append(text.strip())
			
 
				-            except Exception as e:
			
 
				-                print(f"OCR error for {img_path}: {e}")
			
 
				-        
			
 
				-        return " ".join(extracted_text)
			
 
				-    
			
 
				-    def _needs_llm_extraction(self, attributes: Dict, product_data: Dict) -> bool:
			
 
				-        """
			
 
				-        Determine if LLM extraction is needed based on confidence and completeness
			
 
				-        """
			
 
				-        # Check if critical attributes are missing
			
 
				-        critical_attrs = ['category', 'brand', 'color', 'size']
			
 
				-        missing_critical = any(attr not in attributes for attr in critical_attrs)
			
 
				-        
			
 
				-        # Check confidence levels
			
 
				-        low_confidence = any(
			
 
				-            attributes.get(f'{key}_confidence', 0) < self.confidence_threshold
			
 
				-            for key in attributes.keys() if not key.endswith('_confidence')
			
 
				-        )
			
 
				-        
			
 
				-        # Check if description is complex/unstructured
			
 
				-        description = product_data.get('description', '')
			
 
				-        is_complex = len(description.split()) > 100 or 'features' in description.lower()
			
 
				-        
			
 
				-        return missing_critical or low_confidence or is_complex
			
 
				-    
			
 
				-    def _extract_with_llm(self, product_data: Dict, existing_attrs: Dict, ocr_text: str) -> Dict:
			
 
				-        """
			
 
				-        Use LLM to extract comprehensive attributes and validate NLP results
			
 
				-        """
			
 
				-        prompt = f"""Analyze this product and extract ALL possible attributes with high accuracy.
			
 
				-
			
 
				-Title: {product_data.get('title', 'N/A')}
			
 
				-Description: {product_data.get('description', 'N/A')}
			
 
				-Short Description: {product_data.get('short_description', 'N/A')}
			
 
				-Text from images (OCR): {ocr_text if ocr_text else 'N/A'}
			
 
				-
			
 
				-NLP Pre-extracted attributes (validate and enhance): {existing_attrs}
			
 
				-
			
 
				-Extract a comprehensive JSON object with these fields (include all that apply):
			
 
				-
			
 
				-**Basic Info:**
			
 
				-- category: specific product category/type
			
 
				-- subcategory: more specific classification
			
 
				-- brand: brand name
			
 
				-- model: model number/name
			
 
				-- product_line: product series/collection
			
 
				-
			
 
				-**Physical Attributes:**
			
 
				-- color: all colors (list if multiple)
			
 
				-- size: size information (with units)
			
 
				-- dimensions: length/width/height with units
			
 
				-- weight: weight with units
			
 
				-- material: materials used (list all)
			
 
				-- finish: surface finish/texture
			
 
				-
			
 
				-**Technical Specs (if applicable):**
			
 
				-- specifications: key technical specs as object
			
 
				-- compatibility: what it works with
			
 
				-- capacity: storage/volume capacity
			
 
				-- power: power requirements/battery info
			
 
				-
			
 
				-**Commercial Info:**
			
 
				-- condition: new/used/refurbished
			
 
				-- warranty: warranty information
			
 
				-- country_of_origin: manufacturing country
			
 
				-- certifications: safety/quality certifications
			
 
				-
			
 
				-**Descriptive:**
			
 
				-- key_features: list of 5-8 main features
			
 
				-- benefits: main benefits/use cases
			
 
				-- target_audience: who this is for
			
 
				-- usage_instructions: how to use (if mentioned)
			
 
				-- care_instructions: care/maintenance info
			
 
				-- style: style/aesthetic (modern, vintage, etc)
			
 
				-- season: seasonal relevance (if applicable)
			
 
				-- occasion: suitable occasions (if applicable)
			
 
				-
			
 
				-**Additional:**
			
 
				-- package_contents: what's included
			
 
				-- variants: available variants/options
			
 
				-- tags: relevant search tags (list)
			
 
				-
			
 
				-Only include fields where you have high confidence. Use null for uncertain values.
			
 
				-For lists, provide all relevant items. Be thorough and extract every possible detail."""
			
 
				-
			
 
				-        content = [{"type": "text", "text": prompt}]
			
 
				-        
			
 
				-        # Add images if available
			
 
				-        if product_data.get('images'):
			
 
				-            for img_path in product_data['images'][:3]:  # Include up to 3 images for better context
			
 
				-                try:
			
 
				-                    with open(img_path, 'rb') as f:
			
 
				-                        img_data = base64.b64encode(f.read()).decode()
			
 
				-                    
			
 
				-                    # Determine media type
			
 
				-                    media_type = "image/jpeg"
			
 
				-                    if img_path.lower().endswith('.png'):
			
 
				-                        media_type = "image/png"
			
 
				-                    elif img_path.lower().endswith('.webp'):
			
 
				-                        media_type = "image/webp"
			
 
				-                    
			
 
				-                    content.append({
			
 
				-                        "type": "image",
			
 
				-                        "source": {
			
 
				-                            "type": "base64",
			
 
				-                            "media_type": media_type,
			
 
				-                            "data": img_data
			
 
				-                        }
			
 
				-                    })
			
 
				-                except Exception as e:
			
 
				-                    print(f"Error processing image {img_path}: {e}")
			
 
				-        
			
 
				-        try:
			
 
				-            response = self.client.messages.create(
			
 
				-                model="claude-sonnet-4-20250514",
			
 
				-                max_tokens=2048,  # Increased for comprehensive extraction
			
 
				-                messages=[{"role": "user", "content": content}]
			
 
				-            )
			
 
				-            
			
 
				-            # Parse JSON response
			
 
				-            import json
			
 
				-            llm_result = json.loads(response.content[0].text)
			
 
				-            
			
 
				-            # Add high confidence to LLM results
			
 
				-            for key in llm_result:
			
 
				-                if llm_result[key] is not None:
			
 
				-                    llm_result[f'{key}_confidence'] = 0.95
			
 
				-            
			
 
				-            return llm_result
			
 
				-        
			
 
				-        except Exception as e:
			
 
				-            print(f"LLM extraction error: {e}")
			
 
				-            return {}
			
 
				-    
			
 
				-    def _identify_missing_attributes(self, existing_attrs: Dict) -> List[str]:
			
 
				-        """
			
 
				-        Identify which attributes are missing or low confidence
			
 
				-        """
			
 
				-        important_attrs = ['category', 'brand', 'color', 'size', 'material', 'key_features']
			
 
				-        missing = []
			
 
				-        
			
 
				-        for attr in important_attrs:
			
 
				-            if attr not in existing_attrs or existing_attrs.get(f'{attr}_confidence', 0) < 0.7:
			
 
				-                missing.append(attr)
			
 
				-        
			
 
				-        return missing
			
 
				-    
			
 
				-    def _merge_attributes(self, base: Dict, additional: Dict) -> Dict:
			
 
				-        """
			
 
				-        Intelligently merge attributes, preferring LLM for new attributes and validation
			
 
				-        """
			
 
				-        merged = {}
			
 
				-        
			
 
				-        # Start with all NLP attributes
			
 
				-        for key, value in base.items():
			
 
				-            if not key.endswith('_confidence'):
			
 
				-                merged[key] = value
			
 
				-                merged[f'{key}_confidence'] = base.get(f'{key}_confidence', 0.7)
			
 
				-        
			
 
				-        # Add or override with LLM attributes
			
 
				-        for key, value in additional.items():
			
 
				-            if key.endswith('_confidence'):
			
 
				-                continue
			
 
				-            
			
 
				-            if value is None:
			
 
				-                # Keep NLP value if LLM returns null
			
 
				-                continue
			
 
				-            
			
 
				-            # LLM found new attribute or better value
			
 
				-            if key not in merged:
			
 
				-                merged[key] = value
			
 
				-                merged[f'{key}_confidence'] = additional.get(f'{key}_confidence', 0.95)
			
 
				-            else:
			
 
				-                # Compare values - if different, prefer LLM but mark for review
			
 
				-                llm_conf = additional.get(f'{key}_confidence', 0.95)
			
 
				-                nlp_conf = merged.get(f'{key}_confidence', 0.7)
			
 
				-                
			
 
				-                if str(value).lower() != str(merged[key]).lower():
			
 
				-                    # Values differ - use LLM but add conflict flag
			
 
				-                    merged[key] = value
			
 
				-                    merged[f'{key}_confidence'] = llm_conf
			
 
				-                    merged[f'{key}_nlp_value'] = base.get(key)  # Store NLP value for reference
			
 
				-                    merged[f'{key}_conflict'] = True
			
 
				-                else:
			
 
				-                    # Values match - boost confidence
			
 
				-                    merged[key] = value
			
 
				-                    merged[f'{key}_confidence'] = min(0.99, (llm_conf + nlp_conf) / 2 + 0.1)
			
 
				-        
			
 
				-        return merged
			
 
				-
			
 
				-
			
 
				-# Example usage
			
 
				-if __name__ == "__main__":
			
 
				-    extractor = HybridAttributeExtractor(anthropic_api_key="your-api-key")
			
 
				-    
			
 
				-    product = {
			
 
				-        'title': 'Nike Air Max 270 Running Shoes - Black/White',
			
 
				-        'description': 'Premium running shoes with Max Air cushioning. Breathable mesh upper, rubber outsole. Perfect for daily training.',
			
 
				-        'images': ['path/to/image1.jpg', 'path/to/image2.jpg']
			
 
				-    }
			
 
				-    
			
 
				-    attributes = extractor.extract_attributes(product)
			
 
				-    print(attributes)
			
--- a/attr_extraction/tasks.py
+++ b/attr_extraction/tasks.py
@@ -1,78 +0,0 @@
 
				-# tasks.py
			
 
				-from celery import shared_task
			
 
				-from django.core.cache import cache
			
 
				-from .models import Product, ProductAttribute
			
 
				-from .services.attribute_extractor import HybridAttributeExtractor
			
 
				-import json
			
 
				-import hashlib
			
 
				-
			
 
				-@shared_task(bind=True, max_retries=3)
			
 
				-def extract_product_attributes(self, product_id: int):
			
 
				-    """
			
 
				-    Celery task to extract attributes from a product
			
 
				-    """
			
 
				-    try:
			
 
				-        product = Product.objects.get(id=product_id)
			
 
				-        
			
 
				-        # Check cache first
			
 
				-        cache_key = f"product_attrs_{product.id}_{product.updated_at.timestamp()}"
			
 
				-        cached_attrs = cache.get(cache_key)
			
 
				-        
			
 
				-        if cached_attrs:
			
 
				-            return cached_attrs
			
 
				-        
			
 
				-        # Prepare product data
			
 
				-        product_data = {
			
 
				-            'title': product.title,
			
 
				-            'description': product.description,
			
 
				-            'short_description': product.short_description,
			
 
				-            'images': [img.image.path for img in product.images.all()]
			
 
				-        }
			
 
				-        
			
 
				-        # Extract attributes
			
 
				-        extractor = HybridAttributeExtractor(
			
 
				-            anthropic_api_key=settings.ANTHROPIC_API_KEY
			
 
				-        )
			
 
				-        attributes = extractor.extract_attributes(product_data)
			
 
				-        
			
 
				-        # Save to database
			
 
				-        for attr_name, attr_value in attributes.items():
			
 
				-            if not attr_name.endswith('_confidence'):
			
 
				-                confidence = attributes.get(f'{attr_name}_confidence', 0.5)
			
 
				-                
			
 
				-                ProductAttribute.objects.update_or_create(
			
 
				-                    product=product,
			
 
				-                    attribute_name=attr_name,
			
 
				-                    defaults={
			
 
				-                        'attribute_value': json.dumps(attr_value) if isinstance(attr_value, (list, dict)) else str(attr_value),
			
 
				-                        'confidence_score': confidence,
			
 
				-                        'extraction_method': 'hybrid'
			
 
				-                    }
			
 
				-                )
			
 
				-        
			
 
				-        # Cache for 24 hours
			
 
				-        cache.set(cache_key, attributes, 86400)
			
 
				-        
			
 
				-        # Update product status
			
 
				-        product.attributes_extracted = True
			
 
				-        product.save()
			
 
				-        
			
 
				-        return attributes
			
 
				-        
			
 
				-    except Product.DoesNotExist:
			
 
				-        return {'error': 'Product not found'}
			
 
				-    except Exception as e:
			
 
				-        # Retry with exponential backoff
			
 
				-        raise self.retry(exc=e, countdown=60 * (2 ** self.request.retries))
			
 
				-
			
 
				-
			
 
				-@shared_task
			
 
				-def batch_extract_attributes(product_ids: list):
			
 
				-    """
			
 
				-    Process multiple products in batch
			
 
				-    """
			
 
				-    results = {}
			
 
				-    for product_id in product_ids:
			
 
				-        result = extract_product_attributes.delay(product_id)
			
 
				-        results[product_id] = result.id
			
 
				-    return results
			
--- a/attr_extraction/tests.py
+++ b/attr_extraction/tests.py
@@ -1,3 +0,0 @@
 
				-from django.test import TestCase
			
 
				-
			
 
				-# Create your tests here.
			
--- a/attr_extraction/urls.py
+++ b/attr_extraction/urls.py
@@ -1,15 +0,0 @@
 
				-from django.urls import path
			
 
				-from .views import (
			
 
				-    ExtractAttributesView, 
			
 
				-    BatchExtractAttributesView,
			
 
				-    ProductAttributesView,
			
 
				-    AttributeReviewView
			
 
				-)
			
 
				-
			
 
				-urlpatterns = [
			
 
				-    path('products/<int:product_id>/extract/', ExtractAttributesView.as_view()),
			
 
				-    path('products/batch-extract/', BatchExtractAttributesView.as_view()),
			
 
				-    path('products/<int:product_id>/attributes/', ProductAttributesView.as_view()),
			
 
				-    path('attributes/review/', AttributeReviewView.as_view()),
			
 
				-    path('attributes/<int:attribute_id>/review/', AttributeReviewView.as_view()),
			
 
				-]
			
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -1,99 +0,0 @@
 
				-from django.shortcuts import render
			
 
				-
			
 
				-# Create your views here.
			
 
				-# views.py
			
 
				-from rest_framework.views import APIView
			
 
				-from rest_framework.response import Response
			
 
				-from rest_framework import status
			
 
				-from .tasks import extract_product_attributes, batch_extract_attributes
			
 
				-from .models import Product, ProductAttribute
			
 
				-from .serializers import ProductAttributeSerializer
			
 
				-
			
 
				-class ExtractAttributesView(APIView):
			
 
				-    """
			
 
				-    Trigger attribute extraction for a product
			
 
				-    """
			
 
				-    def post(self, request, product_id):
			
 
				-        try:
			
 
				-            product = Product.objects.get(id=product_id)
			
 
				-            
			
 
				-            # Trigger async task
			
 
				-            task = extract_product_attributes.delay(product_id)
			
 
				-            
			
 
				-            return Response({
			
 
				-                'message': 'Extraction started',
			
 
				-                'task_id': task.id,
			
 
				-                'product_id': product_id
			
 
				-            }, status=status.HTTP_202_ACCEPTED)
			
 
				-            
			
 
				-        except Product.DoesNotExist:
			
 
				-            return Response({'error': 'Product not found'}, status=status.HTTP_404_NOT_FOUND)
			
 
				-
			
 
				-
			
 
				-class BatchExtractAttributesView(APIView):
			
 
				-    """
			
 
				-    Trigger batch extraction
			
 
				-    """
			
 
				-    def post(self, request):
			
 
				-        product_ids = request.data.get('product_ids', [])
			
 
				-        
			
 
				-        if not product_ids:
			
 
				-            return Response({'error': 'No product IDs provided'}, status=status.HTTP_400_BAD_REQUEST)
			
 
				-        
			
 
				-        task_results = batch_extract_attributes.delay(product_ids)
			
 
				-        
			
 
				-        return Response({
			
 
				-            'message': f'Batch extraction started for {len(product_ids)} products',
			
 
				-            'task_id': task_results.id
			
 
				-        }, status=status.HTTP_202_ACCEPTED)
			
 
				-
			
 
				-
			
 
				-class ProductAttributesView(APIView):
			
 
				-    """
			
 
				-    Get extracted attributes for a product
			
 
				-    """
			
 
				-    def get(self, request, product_id):
			
 
				-        try:
			
 
				-            product = Product.objects.get(id=product_id)
			
 
				-            attributes = ProductAttribute.objects.filter(product=product)
			
 
				-            
			
 
				-            serializer = ProductAttributeSerializer(attributes, many=True)
			
 
				-            
			
 
				-            return Response({
			
 
				-                'product_id': product_id,
			
 
				-                'attributes_extracted': product.attributes_extracted,
			
 
				-                'attributes': serializer.data
			
 
				-            })
			
 
				-            
			
 
				-        except Product.DoesNotExist:
			
 
				-            return Response({'error': 'Product not found'}, status=status.HTTP_404_NOT_FOUND)
			
 
				-
			
 
				-
			
 
				-class AttributeReviewView(APIView):
			
 
				-    """
			
 
				-    Review and update low-confidence attributes
			
 
				-    """
			
 
				-    def get(self, request):
			
 
				-        # Get attributes needing review
			
 
				-        attributes = ProductAttribute.objects.filter(
			
 
				-            needs_review=True,
			
 
				-            reviewed=False
			
 
				-        ).select_related('product')[:50]
			
 
				-        
			
 
				-        serializer = ProductAttributeSerializer(attributes, many=True)
			
 
				-        return Response(serializer.data)
			
 
				-    
			
 
				-    def patch(self, request, attribute_id):
			
 
				-        try:
			
 
				-            attribute = ProductAttribute.objects.get(id=attribute_id)
			
 
				-            
			
 
				-            # Update attribute
			
 
				-            attribute.attribute_value = request.data.get('attribute_value', attribute.attribute_value)
			
 
				-            attribute.reviewed = True
			
 
				-            attribute.confidence_score = 1.0  # Human verified
			
 
				-            attribute.save()
			
 
				-            
			
 
				-            return Response({'message': 'Attribute updated'})
			
 
				-            
			
 
				-        except ProductAttribute.DoesNotExist:
			
 
				-            return Response({'error': 'Attribute not found'}, status=status.HTTP_404_NOT_FOUND)
			
--- a/celery.py
+++ b/celery.py
@@ -1,30 +0,0 @@
 
				-# celery.py (in your project root)
			
 
				-import os
			
 
				-from celery import Celery
			
 
				-
			
 
				-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'your_project.settings')
			
 
				-
			
 
				-app = Celery('your_project')
			
 
				-app.config_from_object('django.conf:settings', namespace='CELERY')
			
 
				-app.autodiscover_tasks()
			
 
				-
			
 
				-
			
 
				-# settings.py additions
			
 
				-CELERY_BROKER_URL = 'redis://localhost:6379/0'
			
 
				-CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
			
 
				-CELERY_TASK_SERIALIZER = 'json'
			
 
				-CELERY_ACCEPT_CONTENT = ['json']
			
 
				-CELERY_RESULT_SERIALIZER = 'json'
			
 
				-CELERY_TIMEZONE = 'UTC'
			
 
				-
			
 
				-CACHES = {
			
 
				-    'default': {
			
 
				-        'BACKEND': 'django_redis.cache.RedisCache',
			
 
				-        'LOCATION': 'redis://127.0.0.1:6379/1',
			
 
				-        'OPTIONS': {
			
 
				-            'CLIENT_CLASS': 'django_redis.client.DefaultClient',
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-ANTHROPIC_API_KEY = os.environ.get('ANTHROPIC_API_KEY')
			
--- a/content_quality_tool/settings.py
+++ b/content_quality_tool/settings.py
@@ -29,7 +29,6 @@ INSTALLED_APPS = [
 
				     'django.contrib.staticfiles',
			
 
				     'core',
			
 
				     'rest_framework',
			
 
				-    'attr_extraction',
			
 
				 ]
			
 
				 MIDDLEWARE = [
			
 
				     'django.middleware.security.SecurityMiddleware',
			
@@ -104,6 +103,8 @@ MEDIA_ROOT = BASE_DIR / 'media'
 
				 MEDIA_URL = '/media/'
			
 
				 from django.conf import settings
			
 
				 from django.conf.urls.static import static
			
 
				+
			
 
				+
			
 
				 urlpatterns = [
			
 
				     # ... your routes
			
 
				 ] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
			
@@ -117,23 +118,3 @@ MESSAGE_TAGS = {
 
				 
			
 
				 
			
 
				 
			
 
				-
			
 
				-# settings.py additions
			
 
				-CELERY_BROKER_URL = 'redis://localhost:6379/0'
			
 
				-CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
			
 
				-CELERY_TASK_SERIALIZER = 'json'
			
 
				-CELERY_ACCEPT_CONTENT = ['json']
			
 
				-CELERY_RESULT_SERIALIZER = 'json'
			
 
				-CELERY_TIMEZONE = 'UTC'
			
 
				-
			
 
				-CACHES = {
			
 
				-    'default': {
			
 
				-        'BACKEND': 'django_redis.cache.RedisCache',
			
 
				-        'LOCATION': 'redis://127.0.0.1:6379/1',
			
 
				-        'OPTIONS': {
			
 
				-            'CLIENT_CLASS': 'django_redis.client.DefaultClient',
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-ANTHROPIC_API_KEY = os.environ.get('ANTHROPIC_API_KEY')