Просмотр исходного кода

Merge branch 'master' of https://git.luminad.com/harshit.pathak/content_quality_tool

VISHAL BHANUSHALI 3 месяцев назад
Родитель
Сommit
2876f878ad

+ 38 - 0
attr_extraction/cache_config.py

@@ -0,0 +1,38 @@
+# ==================== cache_config.py ====================
+"""
+Centralized cache configuration for the application.
+Set ENABLE_CACHING to True to enable all caches, False to disable.
+"""
+
+# ⚡ MASTER CACHE CONTROL - Change this single variable to enable/disable ALL caching
+ENABLE_CACHING = False  # Default: OFF
+
+# Individual cache controls (controlled by ENABLE_CACHING)
+ENABLE_ATTRIBUTE_EXTRACTION_CACHE = ENABLE_CACHING
+ENABLE_EMBEDDING_CACHE = ENABLE_CACHING
+ENABLE_CLIP_MODEL_CACHE = ENABLE_CACHING
+
+# Cache size limits (only used when caching is enabled)
+ATTRIBUTE_CACHE_MAX_SIZE = 1000
+EMBEDDING_CACHE_MAX_SIZE = 500
+
+def is_caching_enabled() -> bool:
+    """
+    Check if caching is enabled globally.
+    Returns: bool indicating if caching is enabled
+    """
+    return ENABLE_CACHING
+
+def get_cache_config() -> dict:
+    """
+    Get current cache configuration.
+    Returns: dict with cache settings
+    """
+    return {
+        "master_cache_enabled": ENABLE_CACHING,
+        "attribute_extraction_cache": ENABLE_ATTRIBUTE_EXTRACTION_CACHE,
+        "embedding_cache": ENABLE_EMBEDDING_CACHE,
+        "clip_model_cache": ENABLE_CLIP_MODEL_CACHE,
+        "attribute_cache_max_size": ATTRIBUTE_CACHE_MAX_SIZE,
+        "embedding_cache_max_size": EMBEDDING_CACHE_MAX_SIZE
+    }

+ 0 - 209
attr_extraction/serializers.py

@@ -1,212 +1,3 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# # ==================== serializers.py ====================
-# from rest_framework import serializers
-# from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
-
-
-# class ProductInputSerializer(serializers.Serializer):
-#     """Serializer for individual product input."""
-#     product_id = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-#     title = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-#     short_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-#     long_desc = serializers.CharField(required=False, allow_blank=True, allow_null=True)
-#     image_url = serializers.URLField(required=False, allow_blank=True, allow_null=True)
-
-
-# class MandatoryAttrsField(serializers.DictField):
-#     """Custom DictField to validate mandatory_attrs structure."""
-#     child = serializers.ListField(child=serializers.CharField())
-
-
-# class ProductBatchInputSerializer(serializers.Serializer):
-#     """Serializer for an individual product input within the batch request."""
-#     item_id = serializers.CharField(required=True)
-#     mandatory_attrs = MandatoryAttrsField(
-#         required=True,
-#         help_text="A dictionary of attribute names and their possible values."
-#     )
-
-
-# class SingleProductRequestSerializer(serializers.Serializer):
-#     """Serializer for single product extraction request."""
-#     item_id = serializers.CharField(required=True)
-#     mandatory_attrs = serializers.DictField(
-#         child=serializers.ListField(child=serializers.CharField()),
-#         required=True
-#     )
-#     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
-#     extract_additional = serializers.BooleanField(required=False, default=True)
-#     process_image = serializers.BooleanField(required=False, default=True)
-#     multiple = serializers.ListField(
-#         child=serializers.CharField(),
-#         required=False,
-#         default=list,
-#         help_text="List of attribute names that can have multiple values"
-#     )
-#     threshold_abs = serializers.FloatField(default=0.65, required=False)
-#     margin = serializers.FloatField(default=0.15, required=False)
-#     use_dynamic_thresholds = serializers.BooleanField(default=True, required=False)
-#     use_adaptive_margin = serializers.BooleanField(default=True, required=False)
-#     use_semantic_clustering = serializers.BooleanField(default=True, required=False)
-
-#     def validate_model(self, value):
-#         from django.conf import settings
-#         if value not in settings.SUPPORTED_MODELS:
-#             raise serializers.ValidationError(
-#                 f"Model must be one of {settings.SUPPORTED_MODELS}"
-#             )
-#         return value
-
-
-# class BatchProductRequestSerializer(serializers.Serializer):
-#     """Serializer for batch product extraction request (with item-specific attributes)."""
-#     products = serializers.ListField(
-#         child=ProductBatchInputSerializer(),
-#         required=True,
-#         min_length=1
-#     )
-#     model = serializers.CharField(required=False, default="llama-3.1-8b-instant")
-#     extract_additional = serializers.BooleanField(required=False, default=True)
-#     process_image = serializers.BooleanField(required=False, default=True)
-#     multiple = serializers.ListField(
-#         child=serializers.CharField(),
-#         required=False,
-#         default=list,
-#         help_text="List of attribute names that can have multiple values"
-#     )
-#     threshold_abs = serializers.FloatField(default=0.65, required=False)
-#     margin = serializers.FloatField(default=0.15, required=False)
-#     use_dynamic_thresholds = serializers.BooleanField(default=True, required=False)
-#     use_adaptive_margin = serializers.BooleanField(default=True, required=False)
-#     use_semantic_clustering = serializers.BooleanField(default=True, required=False)
-    
-#     def validate_model(self, value):
-#         from django.conf import settings
-#         if value not in settings.SUPPORTED_MODELS:
-#             raise serializers.ValidationError(
-#                 f"Model must be one of {settings.SUPPORTED_MODELS}"
-#             )
-#         return value
-    
-#     def validate_products(self, value):
-#         from django.conf import settings
-#         max_size = getattr(settings, 'MAX_BATCH_SIZE', 100)
-#         if len(value) > max_size:
-#             raise serializers.ValidationError(
-#                 f"Batch size cannot exceed {max_size} products"
-#             )
-#         return value
-
-
-# class OCRResultSerializer(serializers.Serializer):
-#     """Serializer for OCR results."""
-#     detected_text = serializers.ListField(child=serializers.DictField())
-#     extracted_attributes = serializers.DictField()
-
-
-# class ProductAttributeResultSerializer(serializers.Serializer):
-#     """Serializer for individual product extraction result."""
-#     product_id = serializers.CharField(required=False)
-#     mandatory = serializers.DictField()
-#     additional = serializers.DictField(required=False)
-#     ocr_results = OCRResultSerializer(required=False)
-#     error = serializers.CharField(required=False)
-#     raw_output = serializers.CharField(required=False)
-
-
-# class BatchProductResponseSerializer(serializers.Serializer):
-#     """Serializer for batch extraction response."""
-#     results = serializers.ListField(child=ProductAttributeResultSerializer())
-#     total_products = serializers.IntegerField()
-#     successful = serializers.IntegerField()
-#     failed = serializers.IntegerField()
-
-
-# class ProductSerializer(serializers.ModelSerializer):
-#     """Serializer for Product model with product type details."""
-#     product_type_details = serializers.SerializerMethodField()
-    
-#     class Meta:
-#         model = Product
-#         fields = [
-#             'id',
-#             'item_id',
-#             'product_name',
-#             'product_long_description',
-#             'product_short_description',
-#             'product_type',
-#             'image_path',
-#             'image',
-#             'product_type_details',
-#         ]
-
-#     def get_product_type_details(self, obj):
-#         """Fetch ProductType object and its attributes for this product."""
-#         try:
-#             product_type = ProductType.objects.get(name=obj.product_type)
-#         except ProductType.DoesNotExist:
-#             return []
-
-#         # Serialize its attributes
-#         attributes = ProductAttribute.objects.filter(product_type=product_type)
-#         return [
-#             {
-#                 "attribute_name": attr.name,
-#                 "is_mandatory": "Yes" if attr.is_mandatory else "No",
-#                 "possible_values": [pv.value for pv in attr.possible_values.all()]
-#             }
-#             for attr in attributes
-#         ]
-
-
-# class AttributePossibleValueSerializer(serializers.ModelSerializer):
-#     """Serializer for AttributePossibleValue model."""
-#     class Meta:
-#         model = AttributePossibleValue
-#         fields = ['value']
-
-
-# class ProductAttributeSerializer(serializers.ModelSerializer):
-#     """Serializer for ProductAttribute model with possible values."""
-#     possible_values = AttributePossibleValueSerializer(many=True, read_only=True)
-    
-#     class Meta:
-#         model = ProductAttribute
-#         fields = ['name', 'is_mandatory', 'possible_values']
-
-
-# class ProductTypeSerializer(serializers.ModelSerializer):
-#     """Serializer for ProductType model with attributes."""
-#     attributes = ProductAttributeSerializer(many=True, read_only=True)
-    
-#     class Meta:
-#         model = ProductType
-#         fields = ['name', 'attributes']
-
-
-
-
-
-
-
-        
-
-
-
 # ==================== Updated serializers.py ====================
 from rest_framework import serializers
 from .models import Product, ProductType, ProductAttribute, AttributePossibleValue

Разница между файлами не показана из-за своего большого размера
+ 1 - 2302
attr_extraction/services.py


+ 3 - 17
attr_extraction/urls.py

@@ -1,20 +1,3 @@
-# # ==================== urls.py ====================
-# from django.urls import path
-# from .views import ExtractProductAttributesView,ProductTypeListView, ProductTypeAttributesView, ProductAttributesUploadView, BatchExtractProductAttributesView, ProductListView, ProductUploadExcelView
-
-# urlpatterns = [
-#     path('extract/', ExtractProductAttributesView.as_view(), name='extract-attributes'),
-#     path('batch-extract/', BatchExtractProductAttributesView.as_view(), name='batch-extract-attributes'),
-#     path('products/', ProductListView.as_view(), name='batch-extract-attributes'),
-#     path('products/upload-excel/', ProductUploadExcelView.as_view(), name='product-upload-excel'),
-#     path('products/upload-attributes/', ProductAttributesUploadView.as_view(), name='product-upload-excel'),
-#     path('products/attributes/', ProductTypeAttributesView.as_view(), name='product-upload-excel'),
-#     path('product-types/', ProductTypeListView.as_view(), name='product-types-list'),
-# ]
-
-
-
-
 # urls.py
 from django.urls import path
 from .views import (
@@ -46,6 +29,9 @@ urlpatterns = [
     path('attribute-values/', ProductAttributeValueView.as_view(), name='attribute-values'),
     path('attribute-values/bulk/', BulkProductAttributeValueView.as_view(), name='attribute-values-bulk'),
     # path('attribute-values/upload-excel/', ProductAttributeValueUploadExcelView.as_view(), name='attribute-values-upload'),
+
+    path('attribute-values/', ProductAttributeValueView.as_view(), name='attribute-values'),
+    path('attribute-values/bulk/', BulkProductAttributeValueView.as_view(), name='attribute-values-bulk'),
 ]
 
 

+ 213 - 517
attr_extraction/views.py

@@ -1,40 +1,81 @@
+# ==================== Cleaned & Optimized Imports ====================
+
+import os
+import io
+import json
+import logging
+import threading
+from datetime import datetime
+import pandas as pd
+import concurrent.futures
+
+from django.conf import settings
+from django.http import HttpResponse
+from django.db import transaction
+from django.db.models import Prefetch
+
 from rest_framework.views import APIView
 from rest_framework.response import Response
 from rest_framework import status
 from rest_framework.parsers import MultiPartParser, FormParser
-from django.db import transaction
-import pandas as pd
-from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
+
+from openpyxl import Workbook
+from openpyxl.styles import Font, PatternFill, Alignment
+
+
+from rest_framework.views import APIView
+from . import cache_config
+
+# --- Local imports ---
+from .models import (
+    Product,
+    ProductType,
+    ProductAttribute,
+    ProductAttributeValue,
+    AttributePossibleValue,
+)
 from .serializers import (
-    SingleProductRequestSerializer,
-    BatchProductRequestSerializer,
-    ProductAttributeResultSerializer, 
-    BatchProductResponseSerializer,
     ProductSerializer,
     ProductTypeSerializer,
     ProductAttributeSerializer,
-    AttributePossibleValueSerializer
+    AttributePossibleValueSerializer,
+    SingleProductRequestSerializer,
+    BatchProductRequestSerializer,
+    ProductAttributeResultSerializer,
+    BatchProductResponseSerializer,
+    ProductAttributeValueSerializer,
+    ProductAttributeValueInputSerializer,
+    BulkProductAttributeValueSerializer,
+    ProductWithAttributesSerializer,
 )
 from .services import ProductAttributeService
 from .ocr_service import OCRService
+from .visual_processing_service import VisualProcessingService
 
+# --- Configuration for Generated Outputs Folder ---
+OUTPUT_FOLDER_NAME = 'generated_outputs'
+OUTPUT_ROOT = os.path.join(settings.MEDIA_ROOT, OUTPUT_FOLDER_NAME)
+OUTPUT_URL = os.path.join(settings.MEDIA_URL, OUTPUT_FOLDER_NAME).replace('\\', '/')  # Ensure forward slashes
 
+LOG_FILE_NAME = 'excel_generation.log'
+STATUS_FILE_NAME = 'excel_generation_status.json'
+EXCEL_FILE_NAME = 'generated_products.xlsx'
 
-# Sample test images (publicly available)
-SAMPLE_IMAGES = {
-    "tshirt": "https://images.unsplash.com/photo-1521572163474-6864f9cf17ab",
-    "dress": "https://images.unsplash.com/photo-1595777457583-95e059d581b8",
-    "jeans": "https://images.unsplash.com/photo-1542272604-787c3835535d"
-}
+LOG_FILE_PATH = os.path.join(OUTPUT_ROOT, LOG_FILE_NAME)
+STATUS_FILE_PATH = os.path.join(OUTPUT_ROOT, STATUS_FILE_NAME)
+EXCEL_FILE_PATH = os.path.join(OUTPUT_ROOT, EXCEL_FILE_NAME)
 
-# ==================== Updated views.py ====================
-from rest_framework.views import APIView
-from rest_framework.response import Response
-from rest_framework import status
-from .models import Product
-from .services import ProductAttributeService
-from .ocr_service import OCRService
-from .visual_processing_service import VisualProcessingService
+# Ensure the output folder exists
+if not os.path.exists(OUTPUT_ROOT):
+    os.makedirs(OUTPUT_ROOT)
+
+# Configure logging
+logging.basicConfig(
+    filename=LOG_FILE_PATH,
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
 
 
 class ExtractProductAttributesView(APIView):
@@ -131,398 +172,7 @@ class ExtractProductAttributesView(APIView):
 
         return Response(result, status=status.HTTP_200_OK)
 
-
-
-# Replace the BatchExtractProductAttributesView in your views.py with this updated version
-
-# class BatchExtractProductAttributesView(APIView):
-#     """
-#     API endpoint to extract product attributes for multiple products in batch.
-#     Uses item-specific mandatory_attrs with source tracking.
-#     Returns attributes in array format with original_value field.
-#     Includes OCR and Visual Processing results.
-#     """
-
-#     def post(self, request):
-#         serializer = BatchProductRequestSerializer(data=request.data)
-#         if not serializer.is_valid():
-#             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
-
-#         validated_data = serializer.validated_data
-        
-#         # Get batch-level settings
-#         product_list = validated_data.get("products", [])
-#         model = validated_data.get("model")
-#         extract_additional = validated_data.get("extract_additional", True)
-#         process_image = validated_data.get("process_image", True)
-#         multiple = validated_data.get("multiple", [])
-#         threshold_abs = validated_data.get("threshold_abs", 0.65)
-#         margin = validated_data.get("margin", 0.15)
-#         use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
-#         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
-#         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
-        
-#         # Extract all item_ids to query the database efficiently
-#         item_ids = [p['item_id'] for p in product_list] 
-        
-#         # Fetch all products in one query
-#         products_queryset = Product.objects.filter(item_id__in=item_ids)
-#         product_map = {product.item_id: product for product in products_queryset}
-#         found_ids = set(product_map.keys())
-        
-#         # Fetch all original attribute values for these products in one query
-#         original_values_qs = ProductAttributeValue.objects.filter(
-#             product__item_id__in=item_ids
-#         ).select_related('product')
-        
-#         # Create a nested dictionary: {item_id: {attribute_name: original_value}}
-#         original_values_map = {}
-#         for attr_val in original_values_qs:
-#             item_id = attr_val.product.item_id
-#             if item_id not in original_values_map:
-#                 original_values_map[item_id] = {}
-#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
-        
-#         results = []
-#         successful = 0
-#         failed = 0
-
-#         for product_entry in product_list:
-#             item_id = product_entry['item_id']
-#             mandatory_attrs = product_entry['mandatory_attrs'] 
-
-#             if item_id not in found_ids:
-#                 failed += 1
-#                 results.append({
-#                     "product_id": item_id,
-#                     "error": "Product not found in database"
-#                 })
-#                 continue
-
-#             product = product_map[item_id]
-            
-#             try: 
-#                 title = product.product_name
-#                 short_desc = product.product_short_description
-#                 long_desc = product.product_long_description
-#                 image_url = product.image_path
-                
-#                 ocr_results = None
-#                 ocr_text = None
-#                 visual_results = None
-
-#                 # Image Processing Logic
-#                 if process_image and image_url:
-#                     # OCR Processing
-#                     ocr_service = OCRService()
-#                     ocr_results = ocr_service.process_image(image_url)
-                    
-#                     if ocr_results and ocr_results.get("detected_text"):
-#                         ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
-#                             ocr_results, model
-#                         )
-#                         ocr_results["extracted_attributes"] = ocr_attrs
-#                         ocr_text = "\n".join([
-#                             f"{item['text']} (confidence: {item['confidence']:.2f})"
-#                             for item in ocr_results["detected_text"]
-#                         ])
-                    
-#                     # Visual Processing
-#                     visual_service = VisualProcessingService()
-#                     product_type_hint = product.product_type if hasattr(product, 'product_type') else None
-#                     visual_results = visual_service.process_image(image_url, product_type_hint)
-                    
-#                     # Format visual attributes to array format with source tracking
-#                     if visual_results and visual_results.get('visual_attributes'):
-#                         visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
-#                             visual_results['visual_attributes']
-#                         )
-
-#                 # Combine product text with source tracking
-#                 product_text, source_map = ProductAttributeService.combine_product_text(
-#                     title=title,
-#                     short_desc=short_desc,
-#                     long_desc=long_desc,
-#                     ocr_text=ocr_text
-#                 )
-
-#                 # Attribute Extraction with source tracking (returns array format)
-#                 extracted = ProductAttributeService.extract_attributes(
-#                     product_text=product_text,
-#                     mandatory_attrs=mandatory_attrs,
-#                     source_map=source_map,
-#                     model=model,
-#                     extract_additional=extract_additional,
-#                     multiple=multiple,
-#                     threshold_abs=threshold_abs,
-#                     margin=margin,
-#                     use_dynamic_thresholds=use_dynamic_thresholds,
-#                     use_adaptive_margin=use_adaptive_margin,
-#                     use_semantic_clustering=use_semantic_clustering
-#                 )
-
-#                 # Add original_value to each extracted attribute
-#                 original_attrs = original_values_map.get(item_id, {})
-                
-#                 # Process mandatory attributes
-#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
-#                     if isinstance(attr_values, list):
-#                         for attr_obj in attr_values:
-#                             if isinstance(attr_obj, dict):
-#                                 # Add original_value if it exists
-#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
-                
-#                 # Process additional attributes
-#                 for attr_name, attr_values in extracted.get("additional", {}).items():
-#                     if isinstance(attr_values, list):
-#                         for attr_obj in attr_values:
-#                             if isinstance(attr_obj, dict):
-#                                 # Add original_value if it exists
-#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
-
-#                 result = {
-#                     "product_id": product.item_id,
-#                     "mandatory": extracted.get("mandatory", {}),
-#                     "additional": extracted.get("additional", {}),
-#                 }
-
-#                 # Attach OCR results if available
-#                 if ocr_results:
-#                     result["ocr_results"] = ocr_results
-                
-#                 # Attach Visual Processing results if available
-#                 if visual_results:
-#                     result["visual_results"] = visual_results
-
-#                 results.append(result)
-#                 successful += 1
-
-#             except Exception as e:
-#                 failed += 1
-#                 results.append({
-#                     "product_id": item_id,
-#                     "error": str(e)
-#                 })
-
-#         batch_result = {
-#             "results": results,
-#             "total_products": len(product_list),
-#             "successful": successful,
-#             "failed": failed
-#         }
-
-#         response_serializer = BatchProductResponseSerializer(data=batch_result)
-#         if response_serializer.is_valid():
-#             return Response(response_serializer.data, status=status.HTTP_200_OK)
-
-#         return Response(batch_result, status=status.HTTP_200_OK)
-
-
-# views.py - OPTIMIZED WITHOUT REDIS/CELERY
-
-# class BatchExtractProductAttributesView(APIView):
-#     """
-#     Optimized batch extraction using ThreadPoolExecutor (built-in Python)
-#     """
-
-#     def post(self, request):
-#         serializer = BatchProductRequestSerializer(data=request.data)
-#         if not serializer.is_valid():
-#             return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
-
-#         validated_data = serializer.validated_data
-#         product_list = validated_data.get("products", [])
-        
-#         # OPTIMIZATION 1: Single optimized database query
-#         item_ids = [p['item_id'] for p in product_list]
-#         products_queryset = Product.objects.filter(
-#             item_id__in=item_ids
-#         ).prefetch_related('attribute_values')  # Single query!
-        
-#         product_map = {product.item_id: product for product in products_queryset}
-        
-#         # OPTIMIZATION 2: Prefetch ALL original attribute values in ONE query
-#         original_values_qs = ProductAttributeValue.objects.filter(
-#             product__item_id__in=item_ids
-#         ).select_related('product')
-        
-#         original_values_map = {}
-#         for attr_val in original_values_qs:
-#             item_id = attr_val.product.item_id
-#             if item_id not in original_values_map:
-#                 original_values_map[item_id] = {}
-#             original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
-        
-#         # Extract settings
-#         model = validated_data.get("model")
-#         extract_additional = validated_data.get("extract_additional", True)
-#         process_image = validated_data.get("process_image", True)
-#         multiple = validated_data.get("multiple", [])
-#         threshold_abs = validated_data.get("threshold_abs", 0.65)
-#         margin = validated_data.get("margin", 0.15)
-#         use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
-#         use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
-#         use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
-        
-#         results = []
-#         successful = 0
-#         failed = 0
-        
-#         # OPTIMIZATION 3: Initialize services once
-#         ocr_service = OCRService() if process_image else None
-#         visual_service = VisualProcessingService() if process_image else None
-
-#         # OPTIMIZATION 4: Process in parallel using ThreadPoolExecutor
-#         def process_single_product(product_entry):
-#             """Process a single product (runs in parallel)"""
-#             item_id = product_entry['item_id']
-#             mandatory_attrs = product_entry['mandatory_attrs']
-
-#             if item_id not in product_map:
-#                 return {
-#                     "product_id": item_id,
-#                     "error": "Product not found in database"
-#                 }, False
-
-#             product = product_map[item_id]
-            
-#             try:
-#                 title = product.product_name
-#                 short_desc = product.product_short_description
-#                 long_desc = product.product_long_description
-#                 image_url = product.image_path
-                
-#                 ocr_results = None
-#                 ocr_text = None
-#                 visual_results = None
-
-#                 # Image processing (if enabled)
-#                 if process_image and image_url:
-#                     if ocr_service:
-#                         ocr_results = ocr_service.process_image(image_url)
-                        
-#                         if ocr_results and ocr_results.get("detected_text"):
-#                             ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
-#                                 ocr_results, model
-#                             )
-#                             ocr_results["extracted_attributes"] = ocr_attrs
-#                             ocr_text = "\n".join([
-#                                 f"{item['text']} (confidence: {item['confidence']:.2f})"
-#                                 for item in ocr_results["detected_text"]
-#                             ])
-                    
-#                     if visual_service:
-#                         product_type_hint = product.product_type if hasattr(product, 'product_type') else None
-#                         visual_results = visual_service.process_image(image_url, product_type_hint)
-                        
-#                         if visual_results and visual_results.get('visual_attributes'):
-#                             visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
-#                                 visual_results['visual_attributes']
-#                             )
-
-#                 # Combine product text with source tracking
-#                 product_text, source_map = ProductAttributeService.combine_product_text(
-#                     title=title,
-#                     short_desc=short_desc,
-#                     long_desc=long_desc,
-#                     ocr_text=ocr_text
-#                 )
-
-#                 # Extract attributes (WITH CACHING ENABLED)
-#                 extracted = ProductAttributeService.extract_attributes(
-#                     product_text=product_text,
-#                     mandatory_attrs=mandatory_attrs,
-#                     source_map=source_map,
-#                     model=model,
-#                     extract_additional=extract_additional,
-#                     multiple=multiple,
-#                     threshold_abs=threshold_abs,
-#                     margin=margin,
-#                     use_dynamic_thresholds=use_dynamic_thresholds,
-#                     use_adaptive_margin=use_adaptive_margin,
-#                     use_semantic_clustering=use_semantic_clustering,
-#                     use_cache=True  # Enable caching!
-#                 )
-
-#                 # Add original values
-#                 original_attrs = original_values_map.get(item_id, {})
-                
-#                 for attr_name, attr_values in extracted.get("mandatory", {}).items():
-#                     if isinstance(attr_values, list):
-#                         for attr_obj in attr_values:
-#                             if isinstance(attr_obj, dict):
-#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
-                
-#                 for attr_name, attr_values in extracted.get("additional", {}).items():
-#                     if isinstance(attr_values, list):
-#                         for attr_obj in attr_values:
-#                             if isinstance(attr_obj, dict):
-#                                 attr_obj["original_value"] = original_attrs.get(attr_name, "")
-
-#                 result = {
-#                     "product_id": product.item_id,
-#                     "mandatory": extracted.get("mandatory", {}),
-#                     "additional": extracted.get("additional", {}),
-#                 }
-
-#                 if ocr_results:
-#                     result["ocr_results"] = ocr_results
-                
-#                 if visual_results:
-#                     result["visual_results"] = visual_results
-
-#                 return result, True
-
-#             except Exception as e:
-#                 return {
-#                     "product_id": item_id,
-#                     "error": str(e)
-#                 }, False
-
-#         # OPTIMIZATION 5: Use ThreadPoolExecutor for parallel processing
-#         import concurrent.futures
-#         max_workers = min(10, len(product_list))  # Up to 10 parallel workers
-        
-#         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-#             # Submit all tasks
-#             future_to_product = {
-#                 executor.submit(process_single_product, product): product
-#                 for product in product_list
-#             }
-            
-#             # Collect results as they complete
-#             for future in concurrent.futures.as_completed(future_to_product):
-#                 try:
-#                     result, success = future.result()
-#                     results.append(result)
-#                     if success:
-#                         successful += 1
-#                     else:
-#                         failed += 1
-#                 except Exception as e:
-#                     failed += 1
-#                     logger.error(f"Unexpected error: {str(e)}")
-#                     results.append({
-#                         "product_id": "unknown",
-#                         "error": str(e)
-#                     })
-
-#         batch_result = {
-#             "results": results,
-#             "total_products": len(product_list),
-#             "successful": successful,
-#             "failed": failed
-#         }
-
-#         response_serializer = BatchProductResponseSerializer(data=batch_result)
-#         if response_serializer.is_valid():
-#             return Response(response_serializer.data, status=status.HTTP_200_OK)
-
-#         return Response(batch_result, status=status.HTTP_200_OK)
-
-
 # ==================== OPTIMIZED BATCH VIEW ====================
-import concurrent.futures
 
 class BatchExtractProductAttributesView(APIView):
     """
@@ -773,7 +423,6 @@ class BatchExtractProductAttributesView(APIView):
         return Response(batch_result, status=status.HTTP_200_OK)
 
 
-
 class ProductListView(APIView):
     """
     GET API to list all products with details
@@ -783,80 +432,6 @@ class ProductListView(APIView):
         serializer = ProductSerializer(products, many=True)
         return Response(serializer.data, status=status.HTTP_200_OK)
 
-
-from rest_framework.views import APIView
-from rest_framework.response import Response
-from rest_framework import status
-from rest_framework.parsers import MultiPartParser, FormParser
-import pandas as pd
-from .models import Product
-
-
-from rest_framework.views import APIView
-from rest_framework.response import Response
-from rest_framework import status
-from rest_framework.parsers import MultiPartParser, FormParser
-from django.db import transaction
-import pandas as pd
-from .models import Product, ProductAttributeValue
-
-
-
-
-
-
-
-
-
-
-
-import logging
-import json
-from rest_framework.views import APIView
-from rest_framework.response import Response
-from rest_framework import status
-from rest_framework.parsers import MultiPartParser, FormParser
-from django.db import transaction
-from django.db.models import Prefetch
-import pandas as pd
-# Import ALL your models
-from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
-from .services import ProductAttributeService
-from .ocr_service import OCRService
-from .visual_processing_service import VisualProcessingService
-from openpyxl import Workbook
-from openpyxl.styles import Font, PatternFill, Alignment
-from django.conf import settings
-import os
-import threading
-from datetime import datetime
-
-# --- Configuration for Generated Outputs Folder ---
-OUTPUT_FOLDER_NAME = 'generated_outputs'
-OUTPUT_ROOT = os.path.join(settings.MEDIA_ROOT, OUTPUT_FOLDER_NAME)
-OUTPUT_URL = os.path.join(settings.MEDIA_URL, OUTPUT_FOLDER_NAME).replace('\\', '/') # Use forward slashes for URL
-
-# Define log and status file paths within the new subfolder
-LOG_FILE_NAME = 'excel_generation.log'
-STATUS_FILE_NAME = 'excel_generation_status.json'
-EXCEL_FILE_NAME = 'generated_products.xlsx'
-
-LOG_FILE_PATH = os.path.join(OUTPUT_ROOT, LOG_FILE_NAME)
-STATUS_FILE_PATH = os.path.join(OUTPUT_ROOT, STATUS_FILE_NAME)
-EXCEL_FILE_PATH = os.path.join(OUTPUT_ROOT, EXCEL_FILE_NAME)
-
-# Ensure the OUTPUT_ROOT exists for files to be saved
-if not os.path.exists(OUTPUT_ROOT):
-    os.makedirs(OUTPUT_ROOT)
-
-# Configure basic logging to the new path
-logging.basicConfig(
-    filename=LOG_FILE_PATH,
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-
 # -------------------------------------------------------------------------------------------------
 
 def generate_product_excel_background():
@@ -1199,7 +774,6 @@ def generate_product_excel_background():
         logger.exception("CRITICAL ERROR during background Excel generation process.")
         write_status("FAILED", error_msg=str(e))
 
-
 # -------------------------------------------------------------------------------------------------
 
 class ProductUploadExcelView(APIView):
@@ -1381,15 +955,6 @@ class ProductUploadExcelView(APIView):
             return Response({'error': f'An unexpected error occurred while processing the file: {str(e)}'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
 
 
-# Add this view to your views.py for downloading a template
-
-from django.http import HttpResponse
-from openpyxl import Workbook
-from openpyxl.styles import Font, PatternFill, Alignment
-from rest_framework.views import APIView
-import io
-
-
 class DownloadExcelTemplateView(APIView):
     """
     GET API to download an Excel template with two sheets:
@@ -1921,24 +1486,6 @@ class ProductTypeListView(APIView):
         return Response({"product_types": list(product_types)}, status=status.HTTP_200_OK)
     
 
-
-# Add these views to your views.py
-
-from rest_framework.views import APIView
-from rest_framework.response import Response
-from rest_framework import status
-from rest_framework.parsers import MultiPartParser, FormParser
-from django.db import transaction
-import pandas as pd
-from .models import Product, ProductAttributeValue
-from .serializers import (
-    ProductAttributeValueSerializer,
-    ProductAttributeValueInputSerializer,
-    BulkProductAttributeValueSerializer,
-    ProductWithAttributesSerializer
-)
-
-
 class ProductAttributeValueView(APIView):
     """
     API to manage manually entered original attribute values.
@@ -2176,4 +1723,153 @@ class ProductListWithAttributesView(APIView):
         else:
             products = Product.objects.all()
             serializer = ProductWithAttributesSerializer(products, many=True)
-            return Response(serializer.data, status=status.HTTP_200_OK)
+            return Response(serializer.data, status=status.HTTP_200_OK)
+        
+
+
+
+class CacheManagementView(APIView):
+    """
+    API endpoint to manage caching system.
+    
+    GET: Get current cache statistics and configuration
+    POST: Enable/disable caching or clear caches
+    """
+    
+    def get(self, request):
+        """
+        Get current cache configuration and statistics.
+        """
+        config = cache_config.get_cache_config()
+        stats = ProductAttributeService.get_cache_stats()
+        
+        return Response({
+            "configuration": config,
+            "statistics": stats,
+            "message": "Cache status retrieved successfully"
+        }, status=status.HTTP_200_OK)
+    
+    def post(self, request):
+        """
+        Manage cache settings.
+        
+        Expected payload examples:
+        
+        1. Enable/disable caching:
+        {
+            "action": "toggle",
+            "enable": true  // or false
+        }
+        
+        2. Clear all caches:
+        {
+            "action": "clear"
+        }
+        
+        3. Clear specific cache:
+        {
+            "action": "clear",
+            "cache_type": "embedding"  // or "attribute" or "clip"
+        }
+        
+        4. Get statistics:
+        {
+            "action": "stats"
+        }
+        """
+        action = request.data.get('action')
+        
+        if not action:
+            return Response({
+                "error": "action is required",
+                "valid_actions": ["toggle", "clear", "stats"]
+            }, status=status.HTTP_400_BAD_REQUEST)
+        
+        # Toggle caching on/off
+        if action == "toggle":
+            enable = request.data.get('enable')
+            
+            if enable is None:
+                return Response({
+                    "error": "enable parameter is required (true/false)"
+                }, status=status.HTTP_400_BAD_REQUEST)
+            
+            # Update the cache configuration
+            cache_config.ENABLE_CACHING = bool(enable)
+            cache_config.ENABLE_ATTRIBUTE_EXTRACTION_CACHE = bool(enable)
+            cache_config.ENABLE_EMBEDDING_CACHE = bool(enable)
+            cache_config.ENABLE_CLIP_MODEL_CACHE = bool(enable)
+            
+            status_msg = "enabled" if enable else "disabled"
+            
+            return Response({
+                "message": f"Caching has been {status_msg}",
+                "configuration": cache_config.get_cache_config()
+            }, status=status.HTTP_200_OK)
+        
+        # Clear caches
+        elif action == "clear":
+            cache_type = request.data.get('cache_type', 'all')
+            
+            if cache_type == 'all':
+                ProductAttributeService.clear_all_caches()
+                VisualProcessingService.clear_clip_cache()
+                message = "All caches cleared successfully"
+            
+            elif cache_type == 'embedding':
+                from .services import EmbeddingCache
+                EmbeddingCache.clear()
+                message = "Embedding cache cleared successfully"
+            
+            elif cache_type == 'attribute':
+                from .services import SimpleCache
+                SimpleCache.clear()
+                message = "Attribute extraction cache cleared successfully"
+            
+            elif cache_type == 'clip':
+                VisualProcessingService.clear_clip_cache()
+                message = "CLIP model cache cleared successfully"
+            
+            else:
+                return Response({
+                    "error": f"Invalid cache_type: {cache_type}",
+                    "valid_types": ["all", "embedding", "attribute", "clip"]
+                }, status=status.HTTP_400_BAD_REQUEST)
+            
+            return Response({
+                "message": message,
+                "statistics": ProductAttributeService.get_cache_stats()
+            }, status=status.HTTP_200_OK)
+        
+        # Get statistics
+        elif action == "stats":
+            stats = ProductAttributeService.get_cache_stats()
+            config = cache_config.get_cache_config()
+            
+            return Response({
+                "configuration": config,
+                "statistics": stats
+            }, status=status.HTTP_200_OK)
+        
+        else:
+            return Response({
+                "error": f"Invalid action: {action}",
+                "valid_actions": ["toggle", "clear", "stats"]
+            }, status=status.HTTP_400_BAD_REQUEST)
+
+
+class CacheStatsView(APIView):
+    """
+    Simple GET endpoint to retrieve cache statistics.
+    """
+    
+    def get(self, request):
+        """Get current cache statistics."""
+        stats = ProductAttributeService.get_cache_stats()
+        config = cache_config.get_cache_config()
+        
+        return Response({
+            "cache_enabled": config["master_cache_enabled"],
+            "statistics": stats,
+            "timestamp": datetime.now().isoformat()
+        }, status=status.HTTP_200_OK)

+ 45 - 1657
attr_extraction/visual_processing_service.py

@@ -1,373 +1,4 @@
-
-# # ==================== visual_processing_service.py (FIXED - Dynamic Detection) ====================
-# import torch
-# import cv2
-# import numpy as np
-# import requests
-# from io import BytesIO
-# from PIL import Image
-# from typing import Dict, List, Optional, Tuple
-# import logging
-# from transformers import CLIPProcessor, CLIPModel
-# from sklearn.cluster import KMeans
-
-# logger = logging.getLogger(__name__)
-
-
-# class VisualProcessingService:
-#     """Service for extracting visual attributes from product images using CLIP."""
-    
-#     # Class-level caching (shared across instances)
-#     _clip_model = None
-#     _clip_processor = None
-#     _device = None
-    
-#     # Define category-specific attributes
-#     CATEGORY_ATTRIBUTES = {
-#         "clothing": {
-#             "products": ["t-shirt", "shirt", "dress", "pants", "jeans", "shorts", 
-#                         "skirt", "jacket", "coat", "sweater", "hoodie", "top", "blouse"],
-#             "attributes": {
-#                 "pattern": ["solid color", "striped", "checkered", "graphic print", "floral", "geometric", "plain"],
-#                 "material": ["cotton", "polyester", "denim", "leather", "silk", "wool", "linen", "blend"],
-#                 "style": ["casual", "formal", "sporty", "streetwear", "elegant", "vintage", "bohemian"],
-#                 "fit": ["slim fit", "regular fit", "loose fit", "oversized", "tailored"],
-#                 "neckline": ["crew neck", "v-neck", "round neck", "collar", "scoop neck"],
-#                 "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", "3/4 sleeve"],
-#                 "closure_type": ["button", "zipper", "pull-on", "snap", "tie"]
-#             }
-#         },
-#         "tools": {
-#             "products": ["screwdriver", "hammer", "wrench", "pliers", "drill", "saw", 
-#                         "measuring tape", "level", "chisel", "file"],
-#             "attributes": {
-#                 "material": ["steel", "aluminum", "plastic", "wood", "rubber", "chrome"],
-#                 "type": ["manual", "electric", "pneumatic", "cordless", "corded"],
-#                 "finish": ["chrome plated", "powder coated", "stainless steel", "painted"],
-#                 "handle_type": ["rubber grip", "plastic", "wooden", "cushioned", "ergonomic"]
-#             }
-#         },
-#         "electronics": {
-#             "products": ["phone", "laptop", "tablet", "headphones", "speaker", "camera", 
-#                         "smartwatch", "charger", "mouse", "keyboard"],
-#             "attributes": {
-#                 "material": ["plastic", "metal", "glass", "aluminum", "rubber"],
-#                 "style": ["modern", "minimalist", "sleek", "industrial", "vintage"],
-#                 "finish": ["matte", "glossy", "metallic", "textured"],
-#                 "connectivity": ["wireless", "wired", "bluetooth", "USB"]
-#             }
-#         },
-#         "furniture": {
-#             "products": ["chair", "table", "sofa", "bed", "desk", "shelf", "cabinet", 
-#                         "dresser", "bench", "stool"],
-#             "attributes": {
-#                 "material": ["wood", "metal", "glass", "plastic", "fabric", "leather"],
-#                 "style": ["modern", "traditional", "industrial", "rustic", "contemporary", "vintage"],
-#                 "finish": ["natural wood", "painted", "stained", "laminated", "upholstered"]
-#             }
-#         },
-#         "home_decor": {
-#             "products": ["painting", "canvas", "wall art", "frame", "vase", "lamp", 
-#                         "mirror", "clock", "sculpture", "poster"],
-#             "attributes": {
-#                 "style": ["modern", "abstract", "traditional", "contemporary", "vintage", "minimalist"],
-#                 "material": ["canvas", "wood", "metal", "glass", "ceramic", "paper"],
-#                 "finish": ["glossy", "matte", "textured", "framed", "gallery wrapped"],
-#                 "theme": ["nature", "geometric", "floral", "landscape", "portrait", "abstract"]
-#             }
-#         },
-#         "kitchen": {
-#             "products": ["pot", "pan", "knife", "utensil", "plate", "bowl", "cup", 
-#                         "appliance", "cutting board", "container"],
-#             "attributes": {
-#                 "material": ["stainless steel", "aluminum", "ceramic", "glass", "plastic", "wood"],
-#                 "finish": ["non-stick", "stainless", "enameled", "anodized"],
-#                 "type": ["manual", "electric", "dishwasher safe"]
-#             }
-#         }
-#     }
-    
-#     def __init__(self):
-#         pass
-    
-#     @classmethod
-#     def _get_device(cls):
-#         """Get optimal device."""
-#         if cls._device is None:
-#             cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-#             logger.info(f"Visual Processing using device: {cls._device}")
-#         return cls._device
-    
-#     @classmethod
-#     def _get_clip_model(cls):
-#         """Lazy load CLIP model with class-level caching."""
-#         if cls._clip_model is None:
-#             logger.info("Loading CLIP model (this may take a few minutes on first use)...")
-#             cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-#             cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-            
-#             device = cls._get_device()
-#             cls._clip_model.to(device)
-#             cls._clip_model.eval()
-            
-#             logger.info("✓ CLIP model loaded successfully")
-#         return cls._clip_model, cls._clip_processor
-    
-#     def download_image(self, image_url: str) -> Optional[Image.Image]:
-#         """Download image from URL."""
-#         try:
-#             response = requests.get(image_url, timeout=10)
-#             response.raise_for_status()
-#             image = Image.open(BytesIO(response.content)).convert('RGB')
-#             return image
-#         except Exception as e:
-#             logger.error(f"Error downloading image from {image_url}: {str(e)}")
-#             return None
-    
-#     def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
-#         """Extract dominant colors using K-means."""
-#         try:
-#             # Resize for faster processing
-#             img_small = image.resize((150, 150))
-#             img_array = np.array(img_small)
-#             pixels = img_array.reshape(-1, 3)
-            
-#             # K-means clustering
-#             kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5)
-#             kmeans.fit(pixels)
-            
-#             colors = []
-#             labels_counts = np.bincount(kmeans.labels_)
-            
-#             for i, center in enumerate(kmeans.cluster_centers_):
-#                 rgb = tuple(center.astype(int))
-#                 color_name = self._get_color_name_simple(rgb)
-#                 percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
-                
-#                 colors.append({
-#                     "name": color_name,
-#                     "rgb": rgb,
-#                     "percentage": percentage
-#                 })
-            
-#             colors.sort(key=lambda x: x['percentage'], reverse=True)
-#             return colors
-            
-#         except Exception as e:
-#             logger.error(f"Error extracting colors: {str(e)}")
-#             return []
-    
-#     def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
-#         """
-#         Simple color name detection without webcolors dependency.
-#         Maps RGB to basic color names.
-#         """
-#         r, g, b = rgb
-        
-#         # Define basic color ranges
-#         colors = {
-#             'black': (r < 50 and g < 50 and b < 50),
-#             'white': (r > 200 and g > 200 and b > 200),
-#             'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
-#             'red': (r > 150 and g < 100 and b < 100),
-#             'green': (g > 150 and r < 100 and b < 100),
-#             'blue': (b > 150 and r < 100 and g < 100),
-#             'yellow': (r > 200 and g > 200 and b < 100),
-#             'orange': (r > 200 and 100 < g < 200 and b < 100),
-#             'purple': (r > 100 and b > 100 and g < 100),
-#             'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
-#             'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
-#             'cyan': (r < 100 and g > 150 and b > 150),
-#         }
-        
-#         for color_name, condition in colors.items():
-#             if condition:
-#                 return color_name
-        
-#         # Default fallback
-#         if r > g and r > b:
-#             return 'red'
-#         elif g > r and g > b:
-#             return 'green'
-#         elif b > r and b > g:
-#             return 'blue'
-#         else:
-#             return 'gray'
-    
-#     def classify_with_clip(
-#         self,
-#         image: Image.Image,
-#         candidates: List[str],
-#         attribute_name: str,
-#         confidence_threshold: float = 0.15
-#     ) -> Dict:
-#         """Use CLIP to classify image against candidate labels."""
-#         try:
-#             model, processor = self._get_clip_model()
-#             device = self._get_device()
-            
-#             # Prepare inputs
-#             inputs = processor(
-#                 text=candidates,
-#                 images=image,
-#                 return_tensors="pt",
-#                 padding=True
-#             )
-            
-#             # Move to device
-#             inputs = {k: v.to(device) for k, v in inputs.items()}
-            
-#             # Get predictions
-#             with torch.no_grad():
-#                 outputs = model(**inputs)
-#                 logits_per_image = outputs.logits_per_image
-#                 probs = logits_per_image.softmax(dim=1).cpu()
-            
-#             # Get top predictions
-#             top_k = min(3, len(candidates))
-#             top_probs, top_indices = torch.topk(probs[0], k=top_k)
-            
-#             results = []
-#             for prob, idx in zip(top_probs, top_indices):
-#                 if prob.item() > confidence_threshold:
-#                     results.append({
-#                         "value": candidates[idx.item()],
-#                         "confidence": float(prob.item())
-#                     })
-            
-#             return {
-#                 "attribute": attribute_name,
-#                 "predictions": results
-#             }
-            
-#         except Exception as e:
-#             logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
-#             return {"attribute": attribute_name, "predictions": []}
-    
-#     def detect_product_category(self, image: Image.Image) -> Tuple[str, float]:
-#         """
-#         First detect which category the product belongs to.
-#         Returns: (category_name, confidence)
-#         """
-#         # Get all product types from all categories
-#         all_categories = []
-#         category_map = {}
-        
-#         for category, data in self.CATEGORY_ATTRIBUTES.items():
-#             for product in data["products"]:
-#                 all_categories.append(f"a photo of a {product}")
-#                 category_map[f"a photo of a {product}"] = category
-        
-#         # Classify
-#         result = self.classify_with_clip(image, all_categories, "category_detection", confidence_threshold=0.10)
-        
-#         if result["predictions"]:
-#             best_match = result["predictions"][0]
-#             detected_category = category_map[best_match["value"]]
-#             product_type = best_match["value"].replace("a photo of a ", "")
-#             confidence = best_match["confidence"]
-            
-#             logger.info(f"Detected category: {detected_category}, product: {product_type}, confidence: {confidence:.3f}")
-#             return detected_category, product_type, confidence
-        
-#         return "unknown", "unknown", 0.0
-    
-#     def process_image(
-#         self,
-#         image_url: str,
-#         product_type_hint: Optional[str] = None
-#     ) -> Dict:
-#         """
-#         Main method to process image and extract visual attributes.
-#         Now dynamically detects product category first.
-#         """
-#         import time
-#         start_time = time.time()
-        
-#         try:
-#             # Download image
-#             image = self.download_image(image_url)
-#             if image is None:
-#                 return {
-#                     "visual_attributes": {},
-#                     "error": "Failed to download image"
-#                 }
-            
-#             visual_attributes = {}
-#             detailed_predictions = {}
-            
-#             # Step 1: Detect product category
-#             detected_category, detected_product_type, category_confidence = self.detect_product_category(image)
-            
-#             # If confidence is too low, return minimal info
-#             if category_confidence < 0.10:
-#                 logger.warning(f"Low confidence in category detection ({category_confidence:.3f}). Returning basic attributes only.")
-#                 colors = self.extract_dominant_colors(image, n_colors=3)
-#                 if colors:
-#                     visual_attributes["primary_color"] = colors[0]["name"]
-#                     visual_attributes["color_palette"] = [c["name"] for c in colors]
-                
-#                 return {
-#                     "visual_attributes": visual_attributes,
-#                     "category_confidence": category_confidence,
-#                     "processing_time": round(time.time() - start_time, 2)
-#                 }
-            
-#             # Add detected product type
-#             visual_attributes["product_type"] = detected_product_type
-#             visual_attributes["category"] = detected_category
-            
-#             # Step 2: Extract color (universal attribute)
-#             colors = self.extract_dominant_colors(image, n_colors=3)
-#             if colors:
-#                 visual_attributes["primary_color"] = colors[0]["name"]
-#                 visual_attributes["color_palette"] = [c["name"] for c in colors]
-            
-#             # Step 3: Extract category-specific attributes
-#             if detected_category in self.CATEGORY_ATTRIBUTES:
-#                 category_config = self.CATEGORY_ATTRIBUTES[detected_category]
-                
-#                 for attr_name, attr_values in category_config["attributes"].items():
-#                     # Use higher confidence threshold for category-specific attributes
-#                     result = self.classify_with_clip(image, attr_values, attr_name, confidence_threshold=0.20)
-                    
-#                     if result["predictions"]:
-#                         # Only add if confidence is reasonable
-#                         best_prediction = result["predictions"][0]
-#                         if best_prediction["confidence"] > 0.20:
-#                             visual_attributes[attr_name] = best_prediction["value"]
-#                         detailed_predictions[attr_name] = result
-            
-#             processing_time = time.time() - start_time
-            
-#             return {
-#                 "visual_attributes": visual_attributes,
-#                 "detailed_predictions": detailed_predictions,
-#                 "category_confidence": category_confidence,
-#                 "processing_time": round(processing_time, 2)
-#             }
-            
-#         except Exception as e:
-#             logger.error(f"Error processing image: {str(e)}")
-#             return {
-#                 "visual_attributes": {},
-#                 "error": str(e),
-#                 "processing_time": round(time.time() - start_time, 2)
-#             }
-
-
-
-
-
-
-
-
-
-
-
-
-
-# ==================== visual_processing_service.py (FIXED - Smart Subcategory Detection) ====================
+# ==================== visual_processing_service.py (WITH CACHE CONTROL) ====================
 import torch
 import numpy as np
 import requests
@@ -378,18 +9,21 @@ import logging
 from transformers import CLIPProcessor, CLIPModel
 from sklearn.cluster import KMeans
 
+# ⚡ IMPORT CACHE CONFIGURATION
+from .cache_config import ENABLE_CLIP_MODEL_CACHE
+
 logger = logging.getLogger(__name__)
 
 import os
-os.environ['TOKENIZERS_PARALLELISM'] = 'false'  # Disable tokenizer warnings
+os.environ['TOKENIZERS_PARALLELISM'] = 'false'
 import warnings
-warnings.filterwarnings('ignore')  # Suppress all warnings
+warnings.filterwarnings('ignore')
 
 
 class VisualProcessingService:
     """Service for extracting visual attributes from product images using CLIP with smart subcategory detection."""
     
-    # Class-level caching (shared across instances)
+    # ⚡ Class-level caching (controlled by cache_config)
     _clip_model = None
     _clip_processor = None
     _device = None
@@ -497,7 +131,24 @@ class VisualProcessingService:
     
     @classmethod
     def _get_clip_model(cls):
-        """Lazy load CLIP model with class-level caching."""
+        """
+        Lazy load CLIP model with optional class-level caching.
+        ⚡ If caching is disabled, model is still loaded but not persisted at class level.
+        """
+        # ⚡ CACHE CONTROL: If caching is disabled, always reload (no persistence)
+        if not ENABLE_CLIP_MODEL_CACHE:
+            logger.info("⚠ CLIP model caching is DISABLED - loading fresh instance")
+            model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+            processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+            
+            device = cls._get_device()
+            model.to(device)
+            model.eval()
+            
+            logger.info("✓ CLIP model loaded (no caching)")
+            return model, processor
+        
+        # Caching is enabled - use class-level cache
         if cls._clip_model is None:
             logger.info("Loading CLIP model (this may take a few minutes on first use)...")
             cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
@@ -507,9 +158,24 @@ class VisualProcessingService:
             cls._clip_model.to(device)
             cls._clip_model.eval()
             
-            logger.info("✓ CLIP model loaded successfully")
+            logger.info("✓ CLIP model loaded and cached successfully")
+        else:
+            logger.info("✓ Using cached CLIP model")
+            
         return cls._clip_model, cls._clip_processor
     
+    @classmethod
+    def clear_clip_cache(cls):
+        """Clear the cached CLIP model to free memory."""
+        if cls._clip_model is not None:
+            del cls._clip_model
+            del cls._clip_processor
+            cls._clip_model = None
+            cls._clip_processor = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            logger.info("✓ CLIP model cache cleared")
+    
     def download_image(self, image_url: str) -> Optional[Image.Image]:
         """Download image from URL."""
         try:
@@ -524,12 +190,10 @@ class VisualProcessingService:
     def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
         """Extract dominant colors using K-means clustering."""
         try:
-            # Resize for faster processing
             img_small = image.resize((150, 150))
             img_array = np.array(img_small)
             pixels = img_array.reshape(-1, 3)
             
-            # K-means clustering
             kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5)
             kmeans.fit(pixels)
             
@@ -547,7 +211,6 @@ class VisualProcessingService:
                     "percentage": round(percentage, 2)
                 })
             
-            # Sort by percentage (most dominant first)
             colors.sort(key=lambda x: x['percentage'], reverse=True)
             return colors
             
@@ -559,7 +222,6 @@ class VisualProcessingService:
         """Map RGB values to basic color names."""
         r, g, b = rgb
         
-        # Define color ranges with priorities
         colors = {
             'black': (r < 50 and g < 50 and b < 50),
             'white': (r > 200 and g > 200 and b > 200),
@@ -580,7 +242,6 @@ class VisualProcessingService:
             if condition:
                 return color_name
         
-        # Fallback to dominant channel
         if r > g and r > b:
             return 'red'
         elif g > r and g > b:
@@ -589,57 +250,6 @@ class VisualProcessingService:
             return 'blue'
         else:
             return 'gray'
-    
-    # def classify_with_clip(
-    #     self,
-    #     image: Image.Image,
-    #     candidates: List[str],
-    #     attribute_name: str,
-    #     confidence_threshold: float = 0.15
-    # ) -> Dict:
-    #     """Use CLIP to classify image against candidate labels."""
-    #     try:
-    #         model, processor = self._get_clip_model()
-    #         device = self._get_device()
-            
-    #         # Prepare inputs
-    #         inputs = processor(
-    #             text=candidates,
-    #             images=image,
-    #             return_tensors="pt",
-    #             padding=True
-    #         )
-            
-    #         # Move to device
-    #         inputs = {k: v.to(device) for k, v in inputs.items()}
-            
-    #         # Get predictions
-    #         with torch.no_grad():
-    #             outputs = model(**inputs)
-    #             logits_per_image = outputs.logits_per_image
-    #             probs = logits_per_image.softmax(dim=1).cpu()
-            
-    #         # Get top predictions
-    #         top_k = min(3, len(candidates))
-    #         top_probs, top_indices = torch.topk(probs[0], k=top_k)
-            
-    #         results = []
-    #         for prob, idx in zip(top_probs, top_indices):
-    #             if prob.item() > confidence_threshold:
-    #                 results.append({
-    #                     "value": candidates[idx.item()],
-    #                     "confidence": round(float(prob.item()), 3)
-    #                 })
-            
-    #         return {
-    #             "attribute": attribute_name,
-    #             "predictions": results
-    #         }
-            
-    #     except Exception as e:
-    #         logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
-    #         return {"attribute": attribute_name, "predictions": []}
-    
 
     def classify_with_clip(
         self,
@@ -653,14 +263,12 @@ class VisualProcessingService:
             model, processor = self._get_clip_model()
             device = self._get_device()
             
-            # ⚡ OPTIMIZATION: Process in smaller batches to avoid memory issues
-            batch_size = 16  # Process 16 candidates at a time
+            batch_size = 16
             all_results = []
             
             for i in range(0, len(candidates), batch_size):
                 batch_candidates = candidates[i:i + batch_size]
                 
-                # Prepare inputs WITHOUT progress bars
                 inputs = processor(
                     text=batch_candidates,
                     images=image,
@@ -668,16 +276,13 @@ class VisualProcessingService:
                     padding=True
                 )
                 
-                # Move to device
                 inputs = {k: v.to(device) for k, v in inputs.items()}
                 
-                # Get predictions
                 with torch.no_grad():
                     outputs = model(**inputs)
                     logits_per_image = outputs.logits_per_image
                     probs = logits_per_image.softmax(dim=1).cpu()
                 
-                # Collect results from this batch
                 for j, prob in enumerate(probs[0]):
                     if prob.item() > confidence_threshold:
                         all_results.append({
@@ -685,7 +290,6 @@ class VisualProcessingService:
                             "confidence": round(float(prob.item()), 3)
                         })
             
-            # Sort by confidence and return top 3
             all_results.sort(key=lambda x: x['confidence'], reverse=True)
             
             return {
@@ -697,16 +301,11 @@ class VisualProcessingService:
             logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
             return {"attribute": attribute_name, "predictions": []}
 
-
-
-
-
     def detect_category_and_subcategory(self, image: Image.Image) -> Tuple[str, str, str, float]:
         """
         Hierarchically detect category, subcategory, and specific product.
         Returns: (category, subcategory, product_type, confidence)
         """
-        # Step 1: Detect if it's clothing or something else
         main_categories = list(self.CATEGORY_ATTRIBUTES.keys())
         category_prompts = [f"a photo of {cat}" for cat in main_categories]
         
@@ -720,11 +319,9 @@ class VisualProcessingService:
         
         logger.info(f"Step 1 - Main category detected: {detected_category} (confidence: {category_confidence:.3f})")
         
-        # Step 2: For clothing, detect subcategory (tops/bottoms/dresses/outerwear)
         if detected_category == "clothing":
             subcategories = self.CATEGORY_ATTRIBUTES["clothing"]["subcategories"]
             
-            # Collect all products grouped by subcategory
             all_products = []
             product_to_subcategory = {}
             
@@ -734,7 +331,6 @@ class VisualProcessingService:
                     all_products.append(prompt)
                     product_to_subcategory[prompt] = subcat
             
-            # Step 3: Detect specific product type
             product_result = self.classify_with_clip(
                 image, 
                 all_products, 
@@ -755,11 +351,9 @@ class VisualProcessingService:
                 logger.warning("Could not detect specific product type for clothing")
                 return detected_category, "unknown", "unknown", category_confidence
         
-        # Step 3: For non-clothing categories, just detect product type
         else:
             category_data = self.CATEGORY_ATTRIBUTES[detected_category]
             
-            # Check if this category has subcategories or direct products
             if "products" in category_data:
                 products = category_data["products"]
                 product_prompts = [f"a photo of {p}" for p in products]
@@ -793,7 +387,6 @@ class VisualProcessingService:
         start_time = time.time()
         
         try:
-            # Download image
             image = self.download_image(image_url)
             if image is None:
                 return {
@@ -804,10 +397,8 @@ class VisualProcessingService:
             visual_attributes = {}
             detailed_predictions = {}
             
-            # Step 1: Detect category, subcategory, and product type
             category, subcategory, product_type, confidence = self.detect_category_and_subcategory(image)
             
-            # Low confidence check
             if confidence < 0.10:
                 logger.warning(f"Low confidence in detection ({confidence:.3f}). Returning basic attributes only.")
                 colors = self.extract_dominant_colors(image, n_colors=3)
@@ -822,13 +413,11 @@ class VisualProcessingService:
                     "processing_time": round(time.time() - start_time, 2)
                 }
             
-            # Add detected metadata
             visual_attributes["product_type"] = product_type
             visual_attributes["category"] = category
             if subcategory != "none" and subcategory != "unknown":
                 visual_attributes["subcategory"] = subcategory
             
-            # Step 2: Extract color information (universal)
             colors = self.extract_dominant_colors(image, n_colors=3)
             if colors:
                 visual_attributes["primary_color"] = colors[0]["name"]
@@ -838,7 +427,6 @@ class VisualProcessingService:
                     for c in colors
                 ]
             
-            # Step 3: Get the right attribute configuration based on subcategory
             attributes_config = None
             
             if category == "clothing":
@@ -853,7 +441,6 @@ class VisualProcessingService:
                     attributes_config = self.CATEGORY_ATTRIBUTES[category]["attributes"]
                     logger.info(f"Using attributes for category: {category}")
             
-            # Step 4: Extract category-specific attributes
             if attributes_config:
                 for attr_name, attr_values in attributes_config.items():
                     result = self.classify_with_clip(
@@ -865,11 +452,9 @@ class VisualProcessingService:
                     
                     if result["predictions"]:
                         best_prediction = result["predictions"][0]
-                        # Only add attributes with reasonable confidence
                         if best_prediction["confidence"] > 0.20:
                             visual_attributes[attr_name] = best_prediction["value"]
                         
-                        # Store detailed predictions for debugging
                         detailed_predictions[attr_name] = result
             
             processing_time = time.time() - start_time
@@ -880,7 +465,8 @@ class VisualProcessingService:
                 "visual_attributes": visual_attributes,
                 "detailed_predictions": detailed_predictions,
                 "detection_confidence": confidence,
-                "processing_time": round(processing_time, 2)
+                "processing_time": round(processing_time, 2),
+                "cache_status": "enabled" if ENABLE_CLIP_MODEL_CACHE else "disabled"
             }
             
         except Exception as e:
@@ -889,1202 +475,4 @@ class VisualProcessingService:
                 "visual_attributes": {},
                 "error": str(e),
                 "processing_time": round(time.time() - start_time, 2)
-            }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# # ==================== visual_processing_service_enhanced.py ====================
-# """
-# Enhanced Visual Processing Service combining CLIP's speed with BLIP-2's comprehensive taxonomy.
-
-# Features:
-# - Fast CLIP-based classification
-# - 70+ product categories across multiple domains
-# - Two-stage classification with validation
-# - Enhanced color normalization
-# - Category-specific attribute detection
-# - Confidence-based fallback mechanisms
-# - Optional center cropping for better focus
-
-# Usage:
-#     service = VisualProcessingService()
-#     result = service.process_image("https://example.com/product.jpg")
-# """
-
-# import torch
-# import cv2
-# import numpy as np
-# import requests
-# from io import BytesIO
-# from PIL import Image
-# from typing import Dict, List, Optional, Tuple
-# import logging
-# from transformers import CLIPProcessor, CLIPModel
-# from sklearn.cluster import KMeans
-
-# logger = logging.getLogger(__name__)
-
-
-# class VisualProcessingService:
-#     """Enhanced service for extracting visual attributes from product images using CLIP."""
-    
-#     # Class-level caching (shared across instances)
-#     _clip_model = None
-#     _clip_processor = None
-#     _device = None
-    
-#     # ==================== EXPANDED TAXONOMY ====================
-    
-#     # Base color vocabulary
-#     COLORS = ["black", "white", "red", "blue", "green", "yellow", "gray", 
-#               "brown", "pink", "purple", "orange", "beige", "navy", "teal"]
-    
-#     # Pattern vocabulary
-#     PATTERNS = ["solid", "striped", "checked", "plaid", "floral", "graphic", 
-#                 "polka dot", "camo", "tie-dye", "abstract", "geometric"]
-    
-#     # Material vocabulary (extended)
-#     MATERIALS = ["cotton", "polyester", "denim", "leather", "wool", "canvas", 
-#                  "silicone", "metal", "fabric", "rubber", "plastic", "wood", 
-#                  "glass", "ceramic", "steel", "foam", "aluminum", "carbon fiber"]
-    
-#     # Style vocabulary
-#     STYLES = ["casual", "formal", "sporty", "streetwear", "elegant", "vintage", 
-#               "modern", "bohemian", "minimalist", "industrial", "rustic", "contemporary"]
-    
-#     # Fit vocabulary
-#     FITS = ["slim fit", "regular fit", "loose fit", "oversized", "tailored", 
-#             "relaxed", "athletic fit"]
-    
-#     # Brand vocabulary (common marketplace brands)
-#     BRANDS = ["nike", "adidas", "sony", "samsung", "apple", "generic", "lego", 
-#               "hasbro", "lg", "panasonic", "microsoft"]
-    
-#     # Age group vocabulary
-#     AGE_GROUPS = ["baby", "toddler", "child", "teen", "adult", "all ages"]
-    
-#     # Comprehensive category-specific attributes
-#     CATEGORY_ATTRIBUTES = {
-#         # ==================== CLOTHING ====================
-#         "clothing": {
-#             "products": ["t-shirt", "shirt", "dress", "pants", "jeans", "shorts", 
-#                         "skirt", "jacket", "coat", "sweater", "hoodie", "top", 
-#                         "blouse", "cardigan", "blazer"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "pattern": PATTERNS,
-#                 "material": ["cotton", "polyester", "denim", "leather", "silk", 
-#                             "wool", "linen", "blend", "canvas"],
-#                 "style": STYLES,
-#                 "fit": FITS,
-#                 "neckline": ["crew neck", "v-neck", "round neck", "collar", 
-#                             "scoop neck", "boat neck", "turtleneck"],
-#                 "sleeve_type": ["short sleeve", "long sleeve", "sleeveless", 
-#                                "3/4 sleeve", "cap sleeve"],
-#                 "closure_type": ["button", "zipper", "pull-on", "snap", "tie", "buckle"]
-#             }
-#         },
-        
-#         # ==================== FOOTWEAR ====================
-#         "footwear": {
-#             "products": ["shoes", "sneakers", "sandals", "boots", "slippers", 
-#                         "heels", "loafers"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["leather", "synthetic", "canvas", "rubber", "suede", "fabric"],
-#                 "type": ["sneakers", "sandals", "formal", "boots", "sports", "casual"],
-#                 "style": STYLES,
-#                 "closure_type": ["lace-up", "slip-on", "velcro", "zipper", "buckle"]
-#             }
-#         },
-        
-#         # ==================== ACCESSORIES ====================
-#         "accessories": {
-#             "products": ["watch", "bag", "backpack", "handbag", "wallet", "belt", 
-#                         "sunglasses", "hat", "scarf"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["leather", "fabric", "metal", "plastic", "canvas", "synthetic"],
-#                 "style": STYLES,
-#                 "type": ["backpack", "tote", "crossbody", "messenger", "duffel"]
-#             }
-#         },
-        
-#         # ==================== JEWELRY ====================
-#         "jewelry": {
-#             "products": ["necklace", "ring", "bracelet", "earrings", "pendant", "chain"],
-#             "attributes": {
-#                 "material": ["gold", "silver", "platinum", "stainless steel", 
-#                             "plastic", "beads", "leather"],
-#                 "style": ["modern", "vintage", "minimalist", "statement", "elegant"],
-#                 "type": ["chain", "band", "solitaire", "hoop", "stud"]
-#             }
-#         },
-        
-#         # ==================== ELECTRONICS ====================
-#         "electronics": {
-#             "products": ["phone", "smartphone", "tablet", "laptop", "headphones", 
-#                         "camera", "tv", "monitor", "keyboard", "mouse", "speaker", 
-#                         "smartwatch", "charger"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["plastic", "metal", "glass", "aluminum", "rubber"],
-#                 "style": ["modern", "minimalist", "sleek", "industrial"],
-#                 "finish": ["matte", "glossy", "metallic", "textured"],
-#                 "type": ["over-ear", "in-ear", "on-ear", "wireless", "wired"],
-#                 "brand": BRANDS
-#             }
-#         },
-        
-#         # ==================== FURNITURE ====================
-#         "furniture": {
-#             "products": ["chair", "table", "sofa", "bed", "desk", "shelf", 
-#                         "cabinet", "dresser", "bench", "stool", "bookshelf"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["wood", "metal", "glass", "plastic", "fabric", "leather"],
-#                 "style": ["modern", "traditional", "industrial", "rustic", 
-#                          "contemporary", "vintage", "minimalist"],
-#                 "finish": ["natural wood", "painted", "stained", "laminated", "upholstered"]
-#             }
-#         },
-        
-#         # ==================== HOME DECOR ====================
-#         "home_decor": {
-#             "products": ["painting", "canvas", "wall art", "frame", "vase", "lamp", 
-#                         "mirror", "clock", "sculpture", "poster", "cushion", "rug"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "style": ["modern", "abstract", "traditional", "contemporary", 
-#                          "vintage", "minimalist", "bohemian"],
-#                 "material": ["canvas", "wood", "metal", "glass", "ceramic", "paper", "fabric"],
-#                 "finish": ["glossy", "matte", "textured", "framed"],
-#                 "theme": ["nature", "geometric", "floral", "landscape", "abstract"]
-#             }
-#         },
-        
-#         # ==================== KITCHEN ====================
-#         "kitchen": {
-#             "products": ["pot", "pan", "knife", "utensil", "plate", "bowl", "cup", 
-#                         "mug", "bottle", "container", "cutting board"],
-#             "attributes": {
-#                 "material": ["stainless steel", "aluminum", "ceramic", "glass", 
-#                             "plastic", "wood", "silicone"],
-#                 "finish": ["non-stick", "stainless", "enameled", "anodized"],
-#                 "type": ["frypan", "saucepan", "chef knife", "utility", "mixing"]
-#             }
-#         },
-        
-#         # ==================== APPLIANCES ====================
-#         "appliances": {
-#             "products": ["microwave", "blender", "vacuum", "fan", "toaster", 
-#                         "coffee maker", "iron", "hair dryer"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "type": ["upright", "robot", "handheld", "ceiling", "table", "tower"],
-#                 "power": ["low", "medium", "high", "variable"],
-#                 "brand": BRANDS
-#             }
-#         },
-        
-#         # ==================== BEAUTY & PERSONAL CARE ====================
-#         "beauty": {
-#             "products": ["lipstick", "perfume", "lotion", "hair dryer", "makeup", 
-#                         "skincare", "nail polish", "shampoo"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "type": ["eau de parfum", "eau de toilette", "body spray", 
-#                         "body lotion", "face cream"],
-#                 "finish": ["matte", "glossy", "satin", "shimmer"]
-#             }
-#         },
-        
-#         # ==================== TOYS ====================
-#         "toys": {
-#             "products": ["doll", "puzzle", "board game", "action figure", "plush toy", 
-#                         "toy car", "lego", "building blocks"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "age_group": AGE_GROUPS,
-#                 "material": ["plastic", "wood", "fabric", "metal", "foam"],
-#                 "type": ["educational", "plush", "action", "vehicle", "puzzle", "board game"],
-#                 "brand": BRANDS
-#             }
-#         },
-        
-#         # ==================== SPORTS & OUTDOOR ====================
-#         "sports": {
-#             "products": ["bicycle", "football", "basketball", "tennis racket", 
-#                         "yoga mat", "helmet", "skateboard", "dumbbells", "ball"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["steel", "aluminum", "carbon fiber", "rubber", 
-#                             "leather", "synthetic", "foam", "composite"],
-#                 "sport_type": ["football", "basketball", "tennis", "cycling", 
-#                               "yoga", "gym", "outdoor", "fitness"],
-#                 "type": ["mountain", "road", "hybrid", "bmx", "indoor", "outdoor"],
-#                 "brand": BRANDS
-#             }
-#         },
-        
-#         # ==================== PET SUPPLIES ====================
-#         "pet_supplies": {
-#             "products": ["pet bed", "pet toy", "leash", "pet bowl", "collar", 
-#                         "pet carrier"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["fabric", "plastic", "metal", "nylon", "leather"],
-#                 "size": ["small", "medium", "large", "extra large"]
-#             }
-#         },
-        
-#         # ==================== BABY PRODUCTS ====================
-#         "baby": {
-#             "products": ["stroller", "baby bottle", "diaper", "crib", "high chair", 
-#                         "baby carrier"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": MATERIALS,
-#                 "type": ["full-size", "umbrella", "jogging", "disposable", "cloth"],
-#                 "age_group": ["newborn", "baby", "toddler"]
-#             }
-#         },
-        
-#         # ==================== TOOLS & HARDWARE ====================
-#         "tools": {
-#             "products": ["hammer", "drill", "screwdriver", "wrench", "saw", 
-#                         "pliers", "measuring tape", "level"],
-#             "attributes": {
-#                 "material": ["steel", "aluminum", "plastic", "wood", "rubber", 
-#                             "chrome", "fiberglass"],
-#                 "type": ["manual", "electric", "cordless", "corded", "pneumatic"],
-#                 "finish": ["chrome plated", "powder coated", "stainless steel"],
-#                 "brand": BRANDS
-#             }
-#         },
-        
-#         # ==================== BOOKS & MEDIA ====================
-#         "books_media": {
-#             "products": ["book", "magazine", "dvd", "video game", "cd", "vinyl"],
-#             "attributes": {
-#                 "type": ["paperback", "hardcover", "ebook", "audiobook"],
-#                 "genre": ["fiction", "non-fiction", "educational", "kids", 
-#                          "action", "adventure", "sports", "rpg"]
-#             }
-#         },
-        
-#         # ==================== AUTOMOTIVE ====================
-#         "automotive": {
-#             "products": ["car accessory", "tire", "car seat", "steering wheel cover", 
-#                         "floor mat"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["rubber", "plastic", "fabric", "leather", "vinyl"],
-#                 "type": ["universal", "custom fit"]
-#             }
-#         },
-        
-#         # ==================== OFFICE SUPPLIES ====================
-#         "office": {
-#             "products": ["pen", "notebook", "folder", "desk organizer", "stapler", 
-#                         "calculator", "paper"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["paper", "plastic", "metal", "cardboard"],
-#                 "type": ["ruled", "blank", "grid", "dot grid"]
-#             }
-#         },
-        
-#         # ==================== GARDEN & OUTDOOR ====================
-#         "garden": {
-#             "products": ["plant pot", "garden tool", "watering can", "planter", 
-#                         "garden hose", "lawn mower"],
-#             "attributes": {
-#                 "color": COLORS,
-#                 "material": ["ceramic", "plastic", "metal", "terracotta", "wood"],
-#                 "type": ["indoor", "outdoor", "hanging", "standing"]
-#             }
-#         }
-#     }
-    
-#     # Attribute-specific confidence thresholds
-#     CONFIDENCE_THRESHOLDS = {
-#         "color": 0.20,
-#         "pattern": 0.25,
-#         "material": 0.30,
-#         "style": 0.20,
-#         "fit": 0.25,
-#         "brand": 0.40,
-#         "type": 0.22,
-#         "finish": 0.28,
-#         "neckline": 0.23,
-#         "sleeve_type": 0.23
-#     }
-    
-#     def __init__(self):
-#         pass
-    
-#     @classmethod
-#     def _get_device(cls):
-#         """Get optimal device."""
-#         if cls._device is None:
-#             cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-#             logger.info(f"Visual Processing using device: {cls._device}")
-#         return cls._device
-    
-#     @classmethod
-#     def _get_clip_model(cls):
-#         """Lazy load CLIP model with class-level caching."""
-#         if cls._clip_model is None:
-#             logger.info("Loading CLIP model (this may take a few minutes on first use)...")
-#             cls._clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-#             cls._clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-            
-#             device = cls._get_device()
-#             cls._clip_model.to(device)
-#             cls._clip_model.eval()
-            
-#             logger.info("✓ CLIP model loaded successfully")
-#         return cls._clip_model, cls._clip_processor
-    
-#     def center_crop(self, image: Image.Image, rel_crop: float = 0.7) -> Image.Image:
-#         """
-#         Center-crop to focus on the product area if there is too much background.
-        
-#         Args:
-#             image: PIL Image
-#             rel_crop: Relative crop size (0.7 = 70% of min dimension)
-#         """
-#         w, h = image.size
-#         side = int(min(w, h) * rel_crop)
-#         left = (w - side) // 2
-#         top = (h - side) // 2
-#         return image.crop((left, top, left + side, top + side))
-    
-#     def download_image(self, image_url: str, apply_crop: bool = False, 
-#                       max_size: Tuple[int, int] = (1024, 1024)) -> Optional[Image.Image]:
-#         """
-#         Download image from URL with optional preprocessing.
-        
-#         Args:
-#             image_url: URL of the image
-#             apply_crop: Whether to apply center crop
-#             max_size: Maximum dimensions for resizing
-#         """
-#         try:
-#             response = requests.get(image_url, timeout=10)
-#             response.raise_for_status()
-#             image = Image.open(BytesIO(response.content)).convert('RGB')
-            
-#             # Resize if too large
-#             if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
-#                 image.thumbnail(max_size, Image.Resampling.LANCZOS)
-            
-#             # Optional center crop
-#             if apply_crop:
-#                 image = self.center_crop(image, rel_crop=0.7)
-            
-#             return image
-#         except Exception as e:
-#             logger.error(f"Error downloading image from {image_url}: {str(e)}")
-#             return None
-    
-#     def normalize_color(self, word: str) -> str:
-#         """
-#         Enhanced color normalization with aliases and modifiers.
-        
-#         Args:
-#             word: Color word to normalize
-#         """
-#         w = word.lower().strip()
-        
-#         # Remove light/dark modifiers
-#         w = w.replace("light ", "").replace("dark ", "")
-#         w = w.replace("bright ", "").replace("pale ", "")
-        
-#         # Alias mapping
-#         aliases = {
-#             "grey": "gray",
-#             "navy": "blue",
-#             "navy blue": "blue",
-#             "maroon": "red",
-#             "crimson": "red",
-#             "scarlet": "red",
-#             "teal": "green",
-#             "turquoise": "blue",
-#             "cyan": "blue",
-#             "indigo": "blue",
-#             "violet": "purple",
-#             "lavender": "purple",
-#             "magenta": "pink",
-#             "off white": "white",
-#             "off-white": "white",
-#             "cream": "beige",
-#             "ivory": "white",
-#             "khaki": "beige",
-#             "tan": "brown",
-#             "bronze": "brown",
-#             "gold": "yellow",
-#             "silver": "gray",
-#             "charcoal": "gray"
-#         }
-        
-#         normalized = aliases.get(w, w)
-        
-#         # Validate against canonical colors
-#         if normalized not in [c.lower() for c in self.COLORS]:
-#             # Try first word if it's a compound
-#             first_word = normalized.split()[0] if ' ' in normalized else normalized
-#             if first_word in [c.lower() for c in self.COLORS]:
-#                 return first_word
-        
-#         return normalized
-    
-#     def extract_dominant_colors(self, image: Image.Image, n_colors: int = 3) -> List[Dict]:
-#         """Extract dominant colors using K-means clustering."""
-#         try:
-#             # Resize for faster processing
-#             img_small = image.resize((150, 150))
-#             img_array = np.array(img_small)
-#             pixels = img_array.reshape(-1, 3)
-            
-#             # Sample if too many pixels
-#             if len(pixels) > 10000:
-#                 indices = np.random.choice(len(pixels), 10000, replace=False)
-#                 pixels = pixels[indices]
-            
-#             # K-means clustering
-#             kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=5, max_iter=100)
-#             kmeans.fit(pixels)
-            
-#             colors = []
-#             labels_counts = np.bincount(kmeans.labels_)
-            
-#             for i, center in enumerate(kmeans.cluster_centers_):
-#                 rgb = tuple(center.astype(int))
-#                 color_name = self._get_color_name_simple(rgb)
-#                 percentage = float(labels_counts[i] / len(kmeans.labels_) * 100)
-                
-#                 colors.append({
-#                     "name": color_name,
-#                     "rgb": rgb,
-#                     "percentage": percentage
-#                 })
-            
-#             colors.sort(key=lambda x: x['percentage'], reverse=True)
-#             return colors
-            
-#         except Exception as e:
-#             logger.error(f"Error extracting colors: {str(e)}")
-#             return []
-    
-#     def _get_color_name_simple(self, rgb: Tuple[int, int, int]) -> str:
-#         """Simple RGB to color name mapping."""
-#         r, g, b = rgb
-        
-#         # Define color ranges
-#         colors = {
-#             'black': (r < 50 and g < 50 and b < 50),
-#             'white': (r > 200 and g > 200 and b > 200),
-#             'gray': (abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30 and 50 <= r <= 200),
-#             'red': (r > 150 and g < 100 and b < 100),
-#             'green': (g > 150 and r < 100 and b < 100),
-#             'blue': (b > 150 and r < 100 and g < 100),
-#             'yellow': (r > 200 and g > 200 and b < 100),
-#             'orange': (r > 200 and 100 < g < 200 and b < 100),
-#             'purple': (r > 100 and b > 100 and g < 100),
-#             'pink': (r > 200 and 100 < g < 200 and 100 < b < 200),
-#             'brown': (50 < r < 150 and 30 < g < 100 and b < 80),
-#             'beige': (150 < r < 220 and 140 < g < 200 and 100 < b < 180),
-#         }
-        
-#         for color_name, condition in colors.items():
-#             if condition:
-#                 return color_name
-        
-#         # Fallback based on dominant channel
-#         if r > g and r > b:
-#             return 'red'
-#         elif g > r and g > b:
-#             return 'green'
-#         elif b > r and b > g:
-#             return 'blue'
-#         else:
-#             return 'gray'
-    
-#     def classify_with_clip(
-#         self,
-#         image: Image.Image,
-#         candidates: List[str],
-#         attribute_name: str,
-#         confidence_threshold: Optional[float] = None
-#     ) -> Dict:
-#         """
-#         Use CLIP to classify image against candidate labels.
-        
-#         Args:
-#             image: PIL Image
-#             candidates: List of text labels to classify against
-#             attribute_name: Name of the attribute being classified
-#             confidence_threshold: Override default threshold
-#         """
-#         try:
-#             model, processor = self._get_clip_model()
-#             device = self._get_device()
-            
-#             # Use attribute-specific threshold if not provided
-#             if confidence_threshold is None:
-#                 confidence_threshold = self.CONFIDENCE_THRESHOLDS.get(attribute_name, 0.20)
-            
-#             # Prepare inputs
-#             inputs = processor(
-#                 text=candidates,
-#                 images=image,
-#                 return_tensors="pt",
-#                 padding=True
-#             )
-            
-#             # Move to device
-#             inputs = {k: v.to(device) for k, v in inputs.items()}
-            
-#             # Get predictions
-#             with torch.no_grad():
-#                 outputs = model(**inputs)
-#                 logits_per_image = outputs.logits_per_image
-#                 probs = logits_per_image.softmax(dim=1).cpu()
-            
-#             # Get top predictions
-#             top_k = min(3, len(candidates))
-#             top_probs, top_indices = torch.topk(probs[0], k=top_k)
-            
-#             results = []
-#             for prob, idx in zip(top_probs, top_indices):
-#                 if prob.item() > confidence_threshold:
-#                     value = candidates[idx.item()]
-#                     # Apply color normalization if color attribute
-#                     if attribute_name == "color":
-#                         value = self.normalize_color(value)
-#                     results.append({
-#                         "value": value,
-#                         "confidence": float(prob.item())
-#                     })
-            
-#             return {
-#                 "attribute": attribute_name,
-#                 "predictions": results
-#             }
-            
-#         except Exception as e:
-#             logger.error(f"Error in CLIP classification for {attribute_name}: {str(e)}")
-#             return {"attribute": attribute_name, "predictions": []}
-    
-#     def detect_category_hierarchical(self, image: Image.Image) -> Tuple[str, str, float]:
-#         """
-#         Two-stage hierarchical product detection:
-#         1. Detect broad category
-#         2. Detect specific product within that category
-        
-#         Returns:
-#             (category, product_type, confidence)
-#         """
-#         # Stage 1: Detect broad category
-#         category_names = list(self.CATEGORY_ATTRIBUTES.keys())
-#         category_labels = [f"a photo of {cat.replace('_', ' ')}" for cat in category_names]
-        
-#         category_result = self.classify_with_clip(
-#             image, category_labels, "category_detection", confidence_threshold=0.15
-#         )
-        
-#         if not category_result["predictions"]:
-#             return "unknown", "unknown", 0.0
-        
-#         # Extract category
-#         best_category_match = category_result["predictions"][0]
-#         detected_category = category_names[category_labels.index(best_category_match["value"])]
-#         category_confidence = best_category_match["confidence"]
-        
-#         # Stage 2: Detect specific product within category
-#         products_in_category = self.CATEGORY_ATTRIBUTES[detected_category]["products"]
-#         product_labels = [f"a photo of a {p}" for p in products_in_category]
-        
-#         product_result = self.classify_with_clip(
-#             image, product_labels, "product_detection", confidence_threshold=0.15
-#         )
-        
-#         if product_result["predictions"]:
-#             best_product = product_result["predictions"][0]
-#             product_type = products_in_category[product_labels.index(best_product["value"])]
-#             product_confidence = best_product["confidence"]
-            
-#             # Combined confidence (geometric mean for balance)
-#             combined_confidence = (category_confidence * product_confidence) ** 0.5
-            
-#             logger.info(f"Detected: {detected_category} → {product_type} (confidence: {combined_confidence:.3f})")
-#             return detected_category, product_type, combined_confidence
-        
-#         return detected_category, "unknown", category_confidence * 0.5
-    
-#     def detect_category_flat(self, image: Image.Image) -> Tuple[str, str, float]:
-#         """
-#         Single-stage flat product detection across all categories.
-#         Faster but potentially less accurate.
-        
-#         Returns:
-#             (category, product_type, confidence)
-#         """
-#         # Collect all products with their categories
-#         all_products = []
-#         product_to_category = {}
-        
-#         for category, data in self.CATEGORY_ATTRIBUTES.items():
-#             for product in data["products"]:
-#                 label = f"a photo of a {product}"
-#                 all_products.append(label)
-#                 product_to_category[label] = category
-        
-#         # Classify
-#         result = self.classify_with_clip(
-#             image, all_products, "product_detection", confidence_threshold=0.15
-#         )
-        
-#         if result["predictions"]:
-#             best_match = result["predictions"][0]
-#             product_label = best_match["value"]
-#             category = product_to_category[product_label]
-#             product_type = product_label.replace("a photo of a ", "")
-#             confidence = best_match["confidence"]
-            
-#             logger.info(f"Detected: {category} → {product_type} (confidence: {confidence:.3f})")
-#             return category, product_type, confidence
-        
-#         return "unknown", "unknown", 0.0
-    
-#     def process_image(
-#         self,
-#         image_url: str,
-#         product_type_hint: Optional[str] = None,
-#         apply_crop: bool = False,
-#         detection_mode: str = "hierarchical"
-#     ) -> Dict:
-#         """
-#         Main method to process image and extract visual attributes.
-        
-#         Args:
-#             image_url: URL of the product image
-#             product_type_hint: Optional hint about product type
-#             apply_crop: Whether to apply center crop for better focus
-#             detection_mode: "hierarchical" (slower, more accurate) or "flat" (faster)
-#         """
-#         import time
-#         start_time = time.time()
-        
-#         try:
-#             # Download image
-#             image = self.download_image(image_url, apply_crop=apply_crop)
-#             if image is None:
-#                 return {
-#                     "visual_attributes": {},
-#                     "error": "Failed to download image"
-#                 }
-            
-#             visual_attributes = {}
-#             detailed_predictions = {}
-            
-#             # Step 1: Detect product category and type
-#             if detection_mode == "hierarchical":
-#                 detected_category, detected_product_type, category_confidence = \
-#                     self.detect_category_hierarchical(image)
-#             else:
-#                 detected_category, detected_product_type, category_confidence = \
-#                     self.detect_category_flat(image)
-            
-#             # If confidence is too low, return minimal info
-#             if category_confidence < 0.12:
-#                 logger.warning(f"Low confidence ({category_confidence:.3f}). Returning basic attributes only.")
-#                 colors = self.extract_dominant_colors(image, n_colors=3)
-#                 if colors:
-#                     visual_attributes["primary_color"] = colors[0]["name"]
-#                     visual_attributes["color_palette"] = [c["name"] for c in colors]
-                
-#                 return {
-#                     "visual_attributes": visual_attributes,
-#                     "category_confidence": category_confidence,
-#                     "processing_time": round(time.time() - start_time, 2),
-#                     "warning": "Low confidence detection"
-#                 }
-            
-#             # Add detected information
-#             visual_attributes["product_type"] = detected_product_type
-#             visual_attributes["category"] = detected_category
-#             visual_attributes["detection_confidence"] = round(category_confidence, 3)
-            
-#             # Step 2: Extract universal color attribute
-#             colors = self.extract_dominant_colors(image, n_colors=3)
-#             if colors:
-#                 visual_attributes["primary_color"] = colors[0]["name"]
-#                 visual_attributes["color_palette"] = [c["name"] for c in colors]
-#                 visual_attributes["color_distribution"] = [
-#                     {"name": c["name"], "percentage": round(c["percentage"], 1)} 
-#                     for c in colors
-#                 ]
-            
-#             # Step 3: Extract category-specific attributes
-#             if detected_category in self.CATEGORY_ATTRIBUTES:
-#                 category_config = self.CATEGORY_ATTRIBUTES[detected_category]
-                
-#                 for attr_name, attr_values in category_config["attributes"].items():
-#                     # Skip color since we already extracted it
-#                     if attr_name == "color":
-#                         continue
-                    
-#                     # Get attribute-specific threshold
-#                     threshold = self.CONFIDENCE_THRESHOLDS.get(attr_name, 0.20)
-                    
-#                     # Classify
-#                     result = self.classify_with_clip(
-#                         image, attr_values, attr_name, confidence_threshold=threshold
-#                     )
-                    
-#                     detailed_predictions[attr_name] = result
-                    
-#                     # Only add if confidence is reasonable
-#                     if result["predictions"]:
-#                         best_prediction = result["predictions"][0]
-#                         if best_prediction["confidence"] > threshold:
-#                             visual_attributes[attr_name] = best_prediction["value"]
-            
-#             processing_time = time.time() - start_time
-            
-#             return {
-#                 "visual_attributes": visual_attributes,
-#                 "detailed_predictions": detailed_predictions,
-#                 "detection_confidence": round(category_confidence, 3),
-#                 "processing_time": round(processing_time, 2),
-#                 "metadata": {
-#                     "detection_mode": detection_mode,
-#                     "crop_applied": apply_crop,
-#                     "image_size": image.size
-#                 }
-#             }
-            
-#         except Exception as e:
-#             logger.error(f"Error processing image: {str(e)}")
-#             import traceback
-#             traceback.print_exc()
-#             return {
-#                 "visual_attributes": {},
-#                 "error": str(e),
-#                 "processing_time": round(time.time() - start_time, 2)
-#             }
-    
-#     def batch_process_images(
-#         self,
-#         image_urls: List[str],
-#         detection_mode: str = "flat"
-#     ) -> List[Dict]:
-#         """
-#         Process multiple images in batch.
-        
-#         Args:
-#             image_urls: List of image URLs
-#             detection_mode: Detection mode to use
-#         """
-#         results = []
-#         for i, url in enumerate(image_urls):
-#             logger.info(f"Processing image {i+1}/{len(image_urls)}: {url}")
-#             result = self.process_image(url, detection_mode=detection_mode)
-#             results.append(result)
-#         return results
-    
-#     @classmethod
-#     def cleanup_models(cls):
-#         """Free up memory by unloading models."""
-#         if cls._clip_model is not None:
-#             del cls._clip_model
-#             del cls._clip_processor
-#             cls._clip_model = None
-#             cls._clip_processor = None
-            
-#             if torch.cuda.is_available():
-#                 torch.cuda.empty_cache()
-            
-#             logger.info("Models unloaded and memory freed")
-    
-#     def get_supported_categories(self) -> List[str]:
-#         """Get list of all supported product categories."""
-#         return list(self.CATEGORY_ATTRIBUTES.keys())
-    
-#     def get_category_products(self, category: str) -> List[str]:
-#         """Get list of products in a specific category."""
-#         return self.CATEGORY_ATTRIBUTES.get(category, {}).get("products", [])
-    
-#     def get_category_attributes(self, category: str) -> Dict[str, List[str]]:
-#         """Get attribute schema for a specific category."""
-#         return self.CATEGORY_ATTRIBUTES.get(category, {}).get("attributes", {})
-    
-#     def get_statistics(self) -> Dict:
-#         """Get statistics about the taxonomy."""
-#         total_products = sum(
-#             len(data["products"]) 
-#             for data in self.CATEGORY_ATTRIBUTES.values()
-#         )
-#         total_attributes = sum(
-#             len(data["attributes"]) 
-#             for data in self.CATEGORY_ATTRIBUTES.values()
-#         )
-        
-#         return {
-#             "total_categories": len(self.CATEGORY_ATTRIBUTES),
-#             "total_products": total_products,
-#             "total_unique_attributes": len(set(
-#                 attr 
-#                 for data in self.CATEGORY_ATTRIBUTES.values() 
-#                 for attr in data["attributes"].keys()
-#             )),
-#             "categories": list(self.CATEGORY_ATTRIBUTES.keys())
-#         }
-
-
-# # ==================== USAGE EXAMPLES ====================
-
-# def example_basic_usage():
-#     """Basic usage example."""
-#     print("=== Basic Usage Example ===\n")
-    
-#     # Initialize service
-#     service = VisualProcessingService()
-    
-#     # Process single image (hierarchical mode - more accurate)
-#     result = service.process_image(
-#         "https://example.com/product.jpg",
-#         detection_mode="hierarchical"
-#     )
-    
-#     print("Product Type:", result["visual_attributes"].get("product_type"))
-#     print("Category:", result["visual_attributes"].get("category"))
-#     print("Primary Color:", result["visual_attributes"].get("primary_color"))
-#     print("Detection Confidence:", result.get("detection_confidence"))
-#     print("Processing Time:", result["processing_time"], "seconds")
-#     print("\nAll Attributes:")
-#     for key, value in result["visual_attributes"].items():
-#         print(f"  {key}: {value}")
-
-
-# def example_fast_mode():
-#     """Fast processing mode example."""
-#     print("\n=== Fast Mode Example ===\n")
-    
-#     service = VisualProcessingService()
-    
-#     # Fast mode (flat detection)
-#     result = service.process_image(
-#         "https://example.com/product.jpg",
-#         detection_mode="flat"  # Faster, single-stage detection
-#     )
-    
-#     print("Processing Time:", result["processing_time"], "seconds")
-#     print("Detected:", result["visual_attributes"])
-
-
-# def example_with_cropping():
-#     """Example with center cropping for busy backgrounds."""
-#     print("\n=== With Center Cropping ===\n")
-    
-#     service = VisualProcessingService()
-    
-#     # Apply center crop to focus on product
-#     result = service.process_image(
-#         "https://example.com/product-with-background.jpg",
-#         apply_crop=True,  # Enable center cropping
-#         detection_mode="hierarchical"
-#     )
-    
-#     print("Crop Applied:", result["metadata"]["crop_applied"])
-#     print("Detected:", result["visual_attributes"])
-
-
-# def example_batch_processing():
-#     """Batch processing example."""
-#     print("\n=== Batch Processing ===\n")
-    
-#     service = VisualProcessingService()
-    
-#     image_urls = [
-#         "https://example.com/product1.jpg",
-#         "https://example.com/product2.jpg",
-#         "https://example.com/product3.jpg"
-#     ]
-    
-#     results = service.batch_process_images(image_urls, detection_mode="flat")
-    
-#     for i, result in enumerate(results):
-#         print(f"\nProduct {i+1}:")
-#         print(f"  Type: {result['visual_attributes'].get('product_type')}")
-#         print(f"  Category: {result['visual_attributes'].get('category')}")
-#         print(f"  Time: {result['processing_time']}s")
-
-
-# def example_category_info():
-#     """Get information about supported categories."""
-#     print("\n=== Category Information ===\n")
-    
-#     service = VisualProcessingService()
-    
-#     # Get statistics
-#     stats = service.get_statistics()
-#     print("Statistics:")
-#     print(f"  Total Categories: {stats['total_categories']}")
-#     print(f"  Total Products: {stats['total_products']}")
-#     print(f"  Unique Attributes: {stats['total_unique_attributes']}")
-    
-#     # Get all categories
-#     categories = service.get_supported_categories()
-#     print(f"\nSupported Categories ({len(categories)}):")
-#     for cat in categories:
-#         products = service.get_category_products(cat)
-#         print(f"  {cat}: {len(products)} products")
-    
-#     # Get attributes for a specific category
-#     print("\nClothing Category Attributes:")
-#     clothing_attrs = service.get_category_attributes("clothing")
-#     for attr, values in clothing_attrs.items():
-#         print(f"  {attr}: {len(values)} options")
-
-
-# def example_detailed_predictions():
-#     """Example showing detailed predictions with confidence scores."""
-#     print("\n=== Detailed Predictions ===\n")
-    
-#     service = VisualProcessingService()
-    
-#     result = service.process_image(
-#         "https://example.com/product.jpg",
-#         detection_mode="hierarchical"
-#     )
-    
-#     print("Visual Attributes (Best Predictions):")
-#     for key, value in result["visual_attributes"].items():
-#         print(f"  {key}: {value}")
-    
-#     print("\nDetailed Predictions (Top 3 for each attribute):")
-#     for attr_name, predictions in result.get("detailed_predictions", {}).items():
-#         print(f"\n  {attr_name}:")
-#         for pred in predictions.get("predictions", []):
-#             print(f"    - {pred['value']}: {pred['confidence']:.3f}")
-
-
-# def example_color_distribution():
-#     """Example showing color palette extraction."""
-#     print("\n=== Color Distribution ===\n")
-    
-#     service = VisualProcessingService()
-    
-#     result = service.process_image("https://example.com/product.jpg")
-    
-#     print("Primary Color:", result["visual_attributes"].get("primary_color"))
-#     print("\nColor Palette:")
-#     for color in result["visual_attributes"].get("color_palette", []):
-#         print(f"  - {color}")
-    
-#     print("\nColor Distribution:")
-#     for color_info in result["visual_attributes"].get("color_distribution", []):
-#         print(f"  {color_info['name']}: {color_info['percentage']}%")
-
-
-# def example_error_handling():
-#     """Example showing error handling."""
-#     print("\n=== Error Handling ===\n")
-    
-#     service = VisualProcessingService()
-    
-#     # Invalid URL
-#     result = service.process_image("https://invalid-url.com/nonexistent.jpg")
-    
-#     if "error" in result:
-#         print("Error occurred:", result["error"])
-#     else:
-#         print("Processing successful")
-    
-#     # Low confidence warning
-#     result = service.process_image("https://example.com/ambiguous-product.jpg")
-    
-#     if "warning" in result:
-#         print("Warning:", result["warning"])
-#         print("Confidence:", result.get("category_confidence"))
-
-
-# def example_cleanup():
-#     """Example showing model cleanup."""
-#     print("\n=== Model Cleanup ===\n")
-    
-#     service = VisualProcessingService()
-    
-#     # Process some images
-#     result = service.process_image("https://example.com/product.jpg")
-#     print("Processed successfully")
-    
-#     # Clean up models when done (frees memory)
-#     VisualProcessingService.cleanup_models()
-#     print("Models cleaned up and memory freed")
-
-
-# # ==================== PRODUCTION USAGE ====================
-
-# def production_example():
-#     """
-#     Production-ready example with proper error handling and logging.
-#     """
-#     import logging
-    
-#     # Setup logging
-#     logging.basicConfig(
-#         level=logging.INFO,
-#         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-#     )
-    
-#     service = VisualProcessingService()
-    
-#     def process_product_image(image_url: str, product_id: str) -> Dict:
-#         """
-#         Process a product image with full error handling.
-#         """
-#         try:
-#             # Process with hierarchical mode for best accuracy
-#             result = service.process_image(
-#                 image_url,
-#                 detection_mode="hierarchical",
-#                 apply_crop=False  # Set True if images have busy backgrounds
-#             )
-            
-#             # Check for errors
-#             if "error" in result:
-#                 logger.error(f"Failed to process {product_id}: {result['error']}")
-#                 return {
-#                     "product_id": product_id,
-#                     "status": "error",
-#                     "error": result["error"]
-#                 }
-            
-#             # Check confidence
-#             confidence = result.get("detection_confidence", 0)
-#             if confidence < 0.15:
-#                 logger.warning(f"Low confidence for {product_id}: {confidence}")
-#                 return {
-#                     "product_id": product_id,
-#                     "status": "low_confidence",
-#                     "confidence": confidence,
-#                     "partial_attributes": result["visual_attributes"]
-#                 }
-            
-#             # Success
-#             return {
-#                 "product_id": product_id,
-#                 "status": "success",
-#                 "attributes": result["visual_attributes"],
-#                 "confidence": confidence,
-#                 "processing_time": result["processing_time"]
-#             }
-            
-#         except Exception as e:
-#             logger.exception(f"Unexpected error processing {product_id}")
-#             return {
-#                 "product_id": product_id,
-#                 "status": "exception",
-#                 "error": str(e)
-#             }
-    
-#     # Process products
-#     products = [
-#         {"id": "PROD001", "image_url": "https://example.com/tshirt.jpg"},
-#         {"id": "PROD002", "image_url": "https://example.com/laptop.jpg"},
-#         {"id": "PROD003", "image_url": "https://example.com/chair.jpg"}
-#     ]
-    
-#     results = []
-#     for product in products:
-#         result = process_product_image(product["image_url"], product["id"])
-#         results.append(result)
-        
-#         # Print summary
-#         if result["status"] == "success":
-#             attrs = result["attributes"]
-#             print(f"\n✓ {product['id']} ({result['processing_time']}s):")
-#             print(f"  Type: {attrs.get('product_type')}")
-#             print(f"  Category: {attrs.get('category')}")
-#             print(f"  Color: {attrs.get('primary_color')}")
-#         else:
-#             print(f"\n✗ {product['id']}: {result['status']}")
-    
-#     return results
-
-
-# # ==================== MAIN ====================
-
-# if __name__ == "__main__":
-#     # Run examples
-#     print("Enhanced Visual Processing Service")
-#     print("=" * 60)
-    
-#     # Show statistics
-#     service = VisualProcessingService()
-#     stats = service.get_statistics()
-#     print(f"\nTaxonomy Coverage:")
-#     print(f"  Categories: {stats['total_categories']}")
-#     print(f"  Products: {stats['total_products']}")
-#     print(f"  Attributes: {stats['total_unique_attributes']}")
-    
-#     print("\n" + "=" * 60)
-#     print("Run individual examples by calling the example functions:")
-#     print("  - example_basic_usage()")
-#     print("  - example_fast_mode()")
-#     print("  - example_with_cropping()")
-#     print("  - example_batch_processing()")
-#     print("  - example_category_info()")
-#     print("  - example_detailed_predictions()")
-#     print("  - example_color_distribution()")
-#     print("  - production_example()")
-#     print("=" * 60)
+            }

+ 10 - 1
content_quality_tool/settings.py

@@ -6,6 +6,13 @@ https://docs.djangoproject.com/en/5.2/topics/settings/
 For the full list of settings and their values, see
 https://docs.djangoproject.com/en/5.2/ref/settings/
 """
+
+import sys
+import io
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+
 from pathlib import Path
 import os
 from django.contrib.messages import constants as messages
@@ -122,4 +129,6 @@ MESSAGE_TAGS = {
 GROQ_API_KEY = "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"
 GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
 SUPPORTED_MODELS = ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]
-MAX_BATCH_SIZE = 100  # Maximum products per batch request
+MAX_BATCH_SIZE = 100  # Maximum products per batch request
+
+

Некоторые файлы не были показаны из-за большого количества измененных файлов