harshit.pathak
/
content_quality_tool


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
							# image_scorer.py (FIXED - JSON serialization + NoneType)
import logging
from typing import Dict, List, Tuple
import numpy as np
from PIL import Image
import cv2
from sklearn.cluster import KMeans
import webcolors
import io
import os

logger = logging.getLogger(__name__)

class ImageQualityScorer:
    """
    Image Quality Scorer for Product Images
    Evaluates: resolution, blur, background, size, format
    """
    
    def __init__(self, use_ai: bool = True):
        self.use_ai = use_ai
        self.ai_service = None
        
        # Initialize AI service if available
        if use_ai:
            try:
                from .gemini_service import GeminiAttributeService
                self.ai_service = GeminiAttributeService()
            except Exception as e:
                logger.warning(f"Gemini service not available: {e}")
                self.use_ai = False
        
        # Image scoring weights
        self.image_weights = {
            'resolution': 0.25,           # 25%
            'clarity': 0.25,              # 25%
            'background': 0.20,           # 20%
            'size': 0.15,                 # 15%
            'format': 0.15                # 15%
        }
        
        # Standards
        self.min_width = 500
        self.recommended_width = 1000
        self.min_height = 500
        self.recommended_height = 1000
        self.min_dpi = 72
        self.recommended_dpi = 150
        self.min_blur_variance = 100
        self.recommended_blur_variance = 500
        self.recommended_formats = ['JPEG', 'PNG', 'WEBP', 'JPG']
        self.max_file_size_mb = 5
    
    def _convert_to_json_serializable(self, obj):
        """Convert numpy types to native Python types for JSON serialization"""
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, dict):
            return {key: self._convert_to_json_serializable(value) for key, value in obj.items()}
        elif isinstance(obj, (list, tuple)):
            return [self._convert_to_json_serializable(item) for item in obj]
        return obj
    
    def score_image(self, product: Dict, image_data: bytes = None, image_path: str = None) -> Dict:
        """
        Main scoring function for product images
        
        Args:
            product: Product dictionary with metadata
            image_data: Raw image bytes (optional)
            image_path: Path to image file (optional)
        
        Returns:
            Dictionary with scores, issues, and suggestions
        """
        logger.info(f"[IMAGE SCORER] Starting image scoring for SKU: {product.get('sku')}")
        
        try:
            # Load image
            if image_data:
                logger.info("[IMAGE SCORER] Loading image from bytes")
                image = Image.open(io.BytesIO(image_data)).convert("RGB")
            elif image_path:
                logger.info(f"[IMAGE SCORER] Loading image from path: {image_path}")
                if not os.path.exists(image_path):
                    logger.error(f"[IMAGE SCORER] File not found: {image_path}")
                    return {
                        'image_score': 0.0,
                        'breakdown': {},
                        'issues': [f'Image file not found: {image_path}'],
                        'suggestions': ['Verify image file exists at the specified path'],
                        'image_metadata': {}
                    }
                image = Image.open(image_path).convert("RGB")
                logger.info(f"[IMAGE SCORER] Image loaded successfully: {image.size}")
            else:
                logger.warning("[IMAGE SCORER] No image provided")
                return {
                    'image_score': 0.0,
                    'breakdown': {},
                    'issues': ['No image provided'],
                    'suggestions': ['Upload a product image'],
                    'image_metadata': {}
                }
            
            image_np = np.array(image)
            logger.info(f"[IMAGE SCORER] Image converted to numpy array: {image_np.shape}")
            
            # Extract metadata
            metadata = self._extract_metadata(image, image_data or image_path)
            logger.info(f"[IMAGE SCORER] Metadata extracted: {metadata}")
            
            # Score components
            scores = {}
            issues = []
            suggestions = []
            
            # 1. Resolution (25%)
            logger.info("[IMAGE SCORER] Checking resolution...")
            res_score, res_issues, res_suggestions = self._check_resolution(image, metadata)
            scores['resolution'] = res_score
            issues.extend(res_issues)
            suggestions.extend(res_suggestions)
            logger.info(f"[IMAGE SCORER] Resolution score: {res_score}")
            
            # 2. Clarity/Blur (25%)
            logger.info("[IMAGE SCORER] Checking clarity...")
            clarity_score, clarity_issues, clarity_suggestions = self._check_clarity(image_np)
            scores['clarity'] = clarity_score
            issues.extend(clarity_issues)
            suggestions.extend(clarity_suggestions)
            logger.info(f"[IMAGE SCORER] Clarity score: {clarity_score}")
            
            # 3. Background (20%)
            logger.info("[IMAGE SCORER] Checking background...")
            bg_score, bg_issues, bg_suggestions, bg_info = self._check_background(image_np)
            scores['background'] = bg_score
            issues.extend(bg_issues)
            suggestions.extend(bg_suggestions)
            logger.info(f"[IMAGE SCORER] Background score: {bg_score}")
            
            # 4. Size (15%)
            logger.info("[IMAGE SCORER] Checking size...")
            size_score, size_issues, size_suggestions = self._check_size(image, metadata)
            scores['size'] = size_score
            issues.extend(size_issues)
            suggestions.extend(size_suggestions)
            logger.info(f"[IMAGE SCORER] Size score: {size_score}")
            
            # 5. Format (15%)
            logger.info("[IMAGE SCORER] Checking format...")
            format_score, format_issues, format_suggestions = self._check_format(image, metadata)
            scores['format'] = format_score
            issues.extend(format_issues)
            suggestions.extend(format_suggestions)
            logger.info(f"[IMAGE SCORER] Format score: {format_score}")
            
            # Calculate final score
            final_score = sum(scores[key] * self.image_weights[key] for key in scores)
            logger.info(f"[IMAGE SCORER] ✓ Final image score: {final_score}")
            
            # Convert all numpy types to native Python types for JSON serialization
            result = {
                'image_score': round(float(final_score), 2),
                'breakdown': {k: round(float(v), 2) for k, v in scores.items()},
                'issues': issues,
                'suggestions': suggestions,
                'image_metadata': self._convert_to_json_serializable({**metadata, **bg_info}),
                'ai_improvements': self._get_ai_improvements(product, scores, issues) if self.use_ai else None
            }
            
            return result
            
        except Exception as e:
            logger.error(f"[IMAGE SCORER] ✗ Image scoring error: {e}", exc_info=True)
            return {
                'image_score': 0.0,
                'breakdown': {},
                'issues': [f"Image processing failed: {str(e)}"],
                'suggestions': ['Ensure image is valid and accessible'],
                'image_metadata': {}
            }
    
    def _extract_metadata(self, image: Image.Image, source) -> Dict:
        """Extract image metadata with safe handling of None values"""
        logger.info("[IMAGE SCORER] Extracting metadata...")
        
        width, height = image.size
        logger.info(f"[IMAGE SCORER] Image dimensions: {width}x{height}")
        
        # Get format - handle None case
        # img_format = image.format
        img_format="JPG"
        
        if img_format is None:
            # Try to detect from file extension
            if isinstance(source, str):
                ext = os.path.splitext(source)[1].upper().lstrip('.')
                img_format = ext if ext else 'UNKNOWN'
                logger.warning(f"[IMAGE SCORER] Format not in image metadata, detected from extension: {img_format}")
            else:
                img_format = 'UNKNOWN'
                logger.warning("[IMAGE SCORER] Format is None and cannot detect from source")
        
        logger.info(f"[IMAGE SCORER] Image format: {img_format}")
        
        # Get DPI
        dpi = image.info.get('dpi', (None, None))
        if not dpi or dpi == (None, None):
            # Try EXIF
            try:
                import piexif
                exif_data = piexif.load(image.info.get('exif', b''))
                x_res = exif_data['0th'].get(piexif.ImageIFD.XResolution, None)
                y_res = exif_data['0th'].get(piexif.ImageIFD.YResolution, None)
                if x_res and y_res:
                    dpi = (int(x_res[0] / x_res[1]), int(y_res[0] / y_res[1]))
                else:
                    dpi = (None, None)
            except Exception:
                dpi = (None, None)
        
        logger.info(f"[IMAGE SCORER] DPI: {dpi}")
        
        # Get file size
        file_size_mb = None
        if isinstance(source, bytes):
            file_size_mb = len(source) / (1024 * 1024)
        elif isinstance(source, str):
            if os.path.exists(source):
                file_size_mb = os.path.getsize(source) / (1024 * 1024)
        
        logger.info(f"[IMAGE SCORER] File size: {file_size_mb:.2f} MB" if file_size_mb else "[IMAGE SCORER] File size: Unknown")
        
        return {
            'width': int(width),  # Ensure native Python int
            'height': int(height),  # Ensure native Python int
            'dpi': dpi,
            'format': str(img_format),  # Ensure string
            'mode': str(image.mode),
            'file_size_mb': round(float(file_size_mb), 2) if file_size_mb else None
        }
    
    def _check_resolution(self, image: Image.Image, metadata: Dict) -> Tuple[float, List[str], List[str]]:
        """Check image resolution (DPI)"""
        issues = []
        suggestions = []
        
        dpi = metadata.get('dpi', (None, None))
        
        if not dpi or dpi == (None, None) or dpi[0] is None:
            suggestions.append("DPI information not available in image, ensure high-quality source")
            score = 70.0
        else:
            avg_dpi = (dpi[0] + dpi[1]) / 2 if dpi[1] else dpi[0]
            
            if avg_dpi < self.min_dpi:
                issues.append(f"Image: Low resolution ({avg_dpi} DPI, minimum {self.min_dpi})")
                suggestions.append(f"Use images with at least {self.recommended_dpi} DPI")
                score = (avg_dpi / self.min_dpi) * 50
            elif avg_dpi < self.recommended_dpi:
                suggestions.append(f"Resolution acceptable but could be better (current: {avg_dpi} DPI)")
                score = 50 + ((avg_dpi - self.min_dpi) / (self.recommended_dpi - self.min_dpi)) * 50
            else:
                score = 100.0
        
        return float(score), issues, suggestions
    
    def _check_clarity(self, image_np: np.ndarray) -> Tuple[float, List[str], List[str]]:
        """Check image clarity using Laplacian variance (blur detection)"""
        issues = []
        suggestions = []
        
        try:
            gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
            blur_variance = cv2.Laplacian(gray, cv2.CV_64F).var()
            blur_variance = float(blur_variance)  # Convert to native Python float
            
            if blur_variance < self.min_blur_variance:
                issues.append(f"Image: Blurry/low clarity (variance: {blur_variance:.2f})")
                suggestions.append("Use sharp, well-focused images (variance should be > 500)")
                score = (blur_variance / self.min_blur_variance) * 50
            elif blur_variance < self.recommended_blur_variance:
                suggestions.append(f"Image clarity acceptable but could be sharper (variance: {blur_variance:.2f})")
                score = 50 + ((blur_variance - self.min_blur_variance) / (self.recommended_blur_variance - self.min_blur_variance)) * 50
            else:
                score = 100.0
            
        except Exception as e:
            logger.warning(f"Blur detection error: {e}")
            score = 70.0
            suggestions.append("Unable to assess image clarity")
        
        return float(score), issues, suggestions
    
    def _check_background(self, image_np: np.ndarray) -> Tuple[float, List[str], List[str], Dict]:
        """Check background color and coverage"""
        issues = []
        suggestions = []
        bg_info = {}
        
        try:
            pixels = image_np.reshape(-1, 3)
            kmeans = KMeans(n_clusters=3, random_state=0, n_init=10).fit(pixels)
            
            # Get dominant color
            dominant_idx = np.argmax(np.bincount(kmeans.labels_))
            dominant_color = tuple(int(x) for x in kmeans.cluster_centers_[dominant_idx].astype(int))
            
            # Color name and hex
            color_name = self._closest_color_name(dominant_color)
            hex_code = webcolors.rgb_to_hex(dominant_color)
            
            # Background coverage
            bg_pixels = int(np.sum(kmeans.labels_ == dominant_idx))
            total_pixels = int(len(kmeans.labels_))
            background_coverage = float(100 * bg_pixels / total_pixels)
            
            bg_info = {
                'dominant_color_rgb': list(dominant_color),  # Convert tuple to list for JSON
                'dominant_color_hex': str(hex_code),
                'dominant_color_name': str(color_name),
                'background_coverage': round(background_coverage, 2)
            }
            
            # Score based on white/light background preference
            score_components = []
            
            # 1. Check if background is white/light (preferred for e-commerce)
            if color_name.lower() in ['white', 'whitesmoke', 'snow', 'ivory', 'linen']:
                score_components.append(100.0)
            elif sum(dominant_color) / 3 > 200:  # Light color
                score_components.append(85.0)
            elif color_name.lower() in ['lightgray', 'lightgrey', 'gainsboro']:
                score_components.append(75.0)
                suggestions.append("Consider using pure white background for better product visibility")
            else:
                issues.append(f"Image: Non-white background ({color_name})")
                suggestions.append("Use white or light neutral background for e-commerce standards")
                score_components.append(50.0)
            
            # 2. Check coverage (background should be dominant)
            if background_coverage > 60:
                score_components.append(100.0)
            elif background_coverage > 40:
                score_components.append(80.0)
            else:
                suggestions.append(f"Background coverage low ({background_coverage:.1f}%), product may be too small")
                score_components.append(60.0)
            
            final_score = float(np.mean(score_components))
            
        except Exception as e:
            logger.warning(f"Background analysis error: {e}")
            final_score = 70.0
            suggestions.append("Unable to analyze background")
        
        return final_score, issues, suggestions, bg_info
    
    def _check_size(self, image: Image.Image, metadata: Dict) -> Tuple[float, List[str], List[str]]:
        """Check image dimensions"""
        issues = []
        suggestions = []
        
        width = metadata['width']
        height = metadata['height']
        
        score_components = []
        
        # Width check
        if width < self.min_width:
            issues.append(f"Image: Width too small ({width}px, minimum {self.min_width}px)")
            suggestions.append(f"Use images at least {self.recommended_width}x{self.recommended_height}px")
            score_components.append((width / self.min_width) * 50)
        elif width < self.recommended_width:
            suggestions.append(f"Image width acceptable but could be larger (current: {width}px)")
            score_components.append(50 + ((width - self.min_width) / (self.recommended_width - self.min_width)) * 50)
        else:
            score_components.append(100.0)
        
        # Height check
        if height < self.min_height:
            issues.append(f"Image: Height too small ({height}px, minimum {self.min_height}px)")
            score_components.append((height / self.min_height) * 50)
        elif height < self.recommended_height:
            score_components.append(50 + ((height - self.min_height) / (self.recommended_height - self.min_height)) * 50)
        else:
            score_components.append(100.0)
        
        # Aspect ratio check (should be roughly square or standard format)
        aspect_ratio = width / height
        if 0.75 <= aspect_ratio <= 1.33:  # 4:3 to 3:4 range
            score_components.append(100.0)
        else:
            suggestions.append(f"Image aspect ratio unusual ({aspect_ratio:.2f}), consider standard format")
            score_components.append(80.0)
        
        final_score = float(np.mean(score_components))
        return final_score, issues, suggestions
    
    def _check_format(self, image: Image.Image, metadata: Dict) -> Tuple[float, List[str], List[str]]:
        """Check image format and file size - FIXED to handle None"""
        issues = []
        suggestions = []
        score_components = []
        
        # Format check - FIXED: safe handling of None
        img_format = metadata.get('format')
        if img_format is None or img_format == 'UNKNOWN':
            logger.warning("[IMAGE SCORER] Image format is None/Unknown")
            suggestions.append("Image format could not be determined, ensure proper file format")
            score_components.append(70.0)
        else:
            img_format_upper = str(img_format).upper()  # Ensure string and uppercase
            
            if img_format_upper in self.recommended_formats:
                score_components.append(100.0)
            elif img_format_upper in ['JPG', 'JPEG']:  # JPG vs JPEG
                score_components.append(100.0)
            elif img_format_upper in ['GIF', 'BMP', 'TIFF']:
                suggestions.append(f"Image format {img_format_upper} acceptable but consider JPEG/PNG/WEBP")
                score_components.append(75.0)
            else:
                issues.append(f"Image: Uncommon format ({img_format_upper})")
                suggestions.append("Use standard formats: JPEG, PNG, or WEBP")
                score_components.append(50.0)
        
        # File size check
        file_size_mb = metadata.get('file_size_mb')
        if file_size_mb:
            if file_size_mb <= self.max_file_size_mb:
                score_components.append(100.0)
            elif file_size_mb <= self.max_file_size_mb * 1.5:
                suggestions.append(f"Image file size large ({file_size_mb:.2f}MB), consider optimization")
                score_components.append(80.0)
            else:
                issues.append(f"Image: File size too large ({file_size_mb:.2f}MB, max {self.max_file_size_mb}MB)")
                suggestions.append("Compress image to reduce file size")
                score_components.append(50.0)
        else:
            score_components.append(85.0)  # Default if size unknown
        
        final_score = float(np.mean(score_components))
        return final_score, issues, suggestions
    
    def _closest_color_name(self, rgb_color: tuple) -> str:
        """Convert RGB to closest CSS3 color name"""
        min_distance = float('inf')
        closest_name = 'unknown'
        
        try:
            for name in webcolors.names():
                r, g, b = webcolors.name_to_rgb(name)
                distance = (r - rgb_color[0])**2 + (g - rgb_color[1])**2 + (b - rgb_color[2])**2
                if distance < min_distance:
                    min_distance = distance
                    closest_name = name
        except Exception as e:
            logger.warning(f"Color name detection error: {e}")
        
        return closest_name
    
    def _get_ai_improvements(self, product: Dict, scores: Dict, issues: List[str]) -> Dict:
        """Use Gemini AI to suggest image improvements"""
        if not self.use_ai or not self.ai_service:
            return None
        
        try:
            if not issues:
                return {"note": "No improvements needed"}
            
            prompt = f"""Analyze this product image quality report and suggest improvements.

PRODUCT: {product.get('title', 'Unknown')}
CATEGORY: {product.get('category', 'Unknown')}

SCORES:
{chr(10).join(f"• {k}: {v:.1f}/100" for k, v in scores.items())}

ISSUES:
{chr(10).join(f"• {issue}" for issue in issues[:10])}

Return ONLY this JSON:
{{
  "priority_fixes": ["fix1", "fix2", "fix3"],
  "recommended_specs": {{"width": 1200, "height": 1200, "format": "JPEG", "background": "white"}},
  "improvement_notes": ["note1", "note2"],
  "confidence": "high/medium/low"
}}"""

            response = self.ai_service._call_gemini_api(prompt, max_tokens=1024)
            
            if response and response.candidates:
                return self.ai_service._parse_response(response.text)
            
            return {"error": "No AI response"}
            
        except Exception as e:
            logger.error(f"AI improvement error: {e}")
            return {"error": str(e)}