|
|
@@ -592,13 +592,622 @@ import pandas as pd
|
|
|
from .models import Product, ProductAttributeValue
|
|
|
|
|
|
|
|
|
-class ProductUploadExcelView(APIView):
|
|
|
+# class ProductUploadExcelView(APIView):
|
|
|
+# """
|
|
|
+# POST API to upload an Excel file with two sheets:
|
|
|
+# 1. 'Products' sheet - Product details
|
|
|
+# 2. 'Attribute_values' sheet - Original attribute values
|
|
|
+
|
|
|
+# Creates/updates both products and their attribute values in a single transaction.
|
|
|
+# """
|
|
|
+# parser_classes = (MultiPartParser, FormParser)
|
|
|
+
|
|
|
+# def post(self, request, *args, **kwargs):
|
|
|
+# file_obj = request.FILES.get('file')
|
|
|
+# if not file_obj:
|
|
|
+# return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
+
|
|
|
+# try:
|
|
|
+# # Read all sheets from Excel file
|
|
|
+# excel_file = pd.ExcelFile(file_obj)
|
|
|
+
|
|
|
+# # Check if required sheets exist
|
|
|
+# if 'Products' not in excel_file.sheet_names:
|
|
|
+# return Response({
|
|
|
+# 'error': "Missing 'Products' sheet",
|
|
|
+# 'available_sheets': excel_file.sheet_names
|
|
|
+# }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
+
|
|
|
+# # Read Products sheet
|
|
|
+# df_products = pd.read_excel(excel_file, sheet_name='Products')
|
|
|
+# df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
|
|
|
+
|
|
|
+# # Check required columns for Products
|
|
|
+# expected_product_cols = {
|
|
|
+# 'item_id',
|
|
|
+# 'product_name',
|
|
|
+# 'product_long_description',
|
|
|
+# 'product_short_description',
|
|
|
+# 'product_type',
|
|
|
+# 'image_path'
|
|
|
+# }
|
|
|
+
|
|
|
+# if not expected_product_cols.issubset(df_products.columns):
|
|
|
+# return Response({
|
|
|
+# 'error': 'Missing required columns in Products sheet',
|
|
|
+# 'required_columns': list(expected_product_cols),
|
|
|
+# 'found_columns': list(df_products.columns)
|
|
|
+# }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
+
|
|
|
+# # Read Attribute_values sheet if it exists
|
|
|
+# df_attributes = None
|
|
|
+# has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
|
|
|
+
|
|
|
+# if has_attributes_sheet:
|
|
|
+# df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
|
|
|
+# df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
|
|
|
+
|
|
|
+# # Check required columns for Attribute_values
|
|
|
+# expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
|
|
|
+# if not expected_attr_cols.issubset(df_attributes.columns):
|
|
|
+# return Response({
|
|
|
+# 'error': 'Missing required columns in Attribute_values sheet',
|
|
|
+# 'required_columns': list(expected_attr_cols),
|
|
|
+# 'found_columns': list(df_attributes.columns)
|
|
|
+# }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
+
|
|
|
+# # Initialize counters
|
|
|
+# products_created = 0
|
|
|
+# products_updated = 0
|
|
|
+# attributes_created = 0
|
|
|
+# attributes_updated = 0
|
|
|
+# products_failed = 0
|
|
|
+# attributes_failed = 0
|
|
|
+# errors = []
|
|
|
+
|
|
|
+# # Use transaction to ensure atomicity
|
|
|
+# with transaction.atomic():
|
|
|
+# # Process Products sheet
|
|
|
+# for idx, row in df_products.iterrows():
|
|
|
+# item_id = str(row.get('item_id', '')).strip()
|
|
|
+# if not item_id:
|
|
|
+# products_failed += 1
|
|
|
+# errors.append(f"Products Row {idx + 2}: Missing item_id")
|
|
|
+# continue
|
|
|
+
|
|
|
+# try:
|
|
|
+# defaults = {
|
|
|
+# 'product_name': str(row.get('product_name', '')),
|
|
|
+# 'product_long_description': str(row.get('product_long_description', '')),
|
|
|
+# 'product_short_description': str(row.get('product_short_description', '')),
|
|
|
+# 'product_type': str(row.get('product_type', '')),
|
|
|
+# 'image_path': str(row.get('image_path', '')),
|
|
|
+# }
|
|
|
+
|
|
|
+# obj, created = Product.objects.update_or_create(
|
|
|
+# item_id=item_id,
|
|
|
+# defaults=defaults
|
|
|
+# )
|
|
|
+
|
|
|
+# if created:
|
|
|
+# products_created += 1
|
|
|
+# else:
|
|
|
+# products_updated += 1
|
|
|
+# except Exception as e:
|
|
|
+# products_failed += 1
|
|
|
+# errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
|
|
|
+
|
|
|
+# # Process Attribute_values sheet if it exists
|
|
|
+# if has_attributes_sheet and df_attributes is not None:
|
|
|
+# # Group by item_id to optimize lookups
|
|
|
+# item_ids_in_attrs = df_attributes['item_id'].unique()
|
|
|
+
|
|
|
+# # Fetch all products at once
|
|
|
+# existing_products = {
|
|
|
+# p.item_id: p
|
|
|
+# for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
|
|
|
+# }
|
|
|
+
|
|
|
+# for idx, row in df_attributes.iterrows():
|
|
|
+# item_id = str(row.get('item_id', '')).strip()
|
|
|
+# attribute_name = str(row.get('attribute_name', '')).strip()
|
|
|
+# original_value = str(row.get('original_value', '')).strip()
|
|
|
+
|
|
|
+# if not item_id or not attribute_name:
|
|
|
+# attributes_failed += 1
|
|
|
+# errors.append(
|
|
|
+# f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
|
|
|
+# )
|
|
|
+# continue
|
|
|
+
|
|
|
+# # Check if product exists
|
|
|
+# product = existing_products.get(item_id)
|
|
|
+# if not product:
|
|
|
+# attributes_failed += 1
|
|
|
+# errors.append(
|
|
|
+# f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
|
|
|
+# "Make sure it exists in Products sheet."
|
|
|
+# )
|
|
|
+# continue
|
|
|
+
|
|
|
+# try:
|
|
|
+# attr_obj, created = ProductAttributeValue.objects.update_or_create(
|
|
|
+# product=product,
|
|
|
+# attribute_name=attribute_name,
|
|
|
+# defaults={'original_value': original_value}
|
|
|
+# )
|
|
|
+
|
|
|
+# if created:
|
|
|
+# attributes_created += 1
|
|
|
+# else:
|
|
|
+# attributes_updated += 1
|
|
|
+# except Exception as e:
|
|
|
+# attributes_failed += 1
|
|
|
+# errors.append(
|
|
|
+# f"Attribute_values Row {idx + 2} "
|
|
|
+# f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
|
|
|
+# )
|
|
|
+
|
|
|
+# # Prepare response
|
|
|
+# response_data = {
|
|
|
+# 'message': 'Upload completed successfully',
|
|
|
+# 'products': {
|
|
|
+# 'created': products_created,
|
|
|
+# 'updated': products_updated,
|
|
|
+# 'failed': products_failed,
|
|
|
+# 'total_processed': products_created + products_updated + products_failed
|
|
|
+# }
|
|
|
+# }
|
|
|
+
|
|
|
+# if has_attributes_sheet:
|
|
|
+# response_data['attribute_values'] = {
|
|
|
+# 'created': attributes_created,
|
|
|
+# 'updated': attributes_updated,
|
|
|
+# 'failed': attributes_failed,
|
|
|
+# 'total_processed': attributes_created + attributes_updated + attributes_failed
|
|
|
+# }
|
|
|
+# else:
|
|
|
+# response_data['attribute_values'] = {
|
|
|
+# 'message': 'Attribute_values sheet not found in Excel file'
|
|
|
+# }
|
|
|
+
|
|
|
+# if errors:
|
|
|
+# response_data['errors'] = errors[:50] # Limit to first 50 errors
|
|
|
+# if len(errors) > 50:
|
|
|
+# response_data['errors'].append(f"... and {len(errors) - 50} more errors")
|
|
|
+
|
|
|
+# # Determine status code
|
|
|
+# if products_failed > 0 or attributes_failed > 0:
|
|
|
+# status_code = status.HTTP_207_MULTI_STATUS
|
|
|
+# else:
|
|
|
+# status_code = status.HTTP_201_CREATED
|
|
|
+
|
|
|
+# return Response(response_data, status=status_code)
|
|
|
+
|
|
|
+# except pd.errors.EmptyDataError:
|
|
|
+# return Response({
|
|
|
+# 'error': 'The uploaded Excel file is empty or invalid'
|
|
|
+# }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
+# except Exception as e:
|
|
|
+# return Response({
|
|
|
+# 'error': f'An error occurred while processing the file: {str(e)}'
|
|
|
+# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+import logging
|
|
|
+import json
|
|
|
+from rest_framework.views import APIView
|
|
|
+from rest_framework.response import Response
|
|
|
+from rest_framework import status
|
|
|
+from rest_framework.parsers import MultiPartParser, FormParser
|
|
|
+from django.db import transaction
|
|
|
+from django.db.models import Prefetch, F
|
|
|
+import pandas as pd
|
|
|
+# Import ALL your models
|
|
|
+from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
|
|
|
+from .services import ProductAttributeService
|
|
|
+from .ocr_service import OCRService
|
|
|
+from .visual_processing_service import VisualProcessingService
|
|
|
+from openpyxl import Workbook
|
|
|
+from openpyxl.styles import Font, PatternFill, Alignment
|
|
|
+from django.conf import settings
|
|
|
+import os
|
|
|
+import threading
|
|
|
+from datetime import datetime
|
|
|
+
|
|
|
+# --- Logging Setup ---
|
|
|
+# Define log and status file paths in MEDIA_ROOT
|
|
|
+LOG_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation.log')
|
|
|
+STATUS_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation_status.json')
|
|
|
+
|
|
|
+# Ensure the MEDIA_ROOT exists for files to be saved
|
|
|
+if not os.path.exists(settings.MEDIA_ROOT):
|
|
|
+ os.makedirs(settings.MEDIA_ROOT)
|
|
|
+
|
|
|
+# Configure basic logging
|
|
|
+logging.basicConfig(
|
|
|
+ filename=LOG_FILE_PATH,
|
|
|
+ level=logging.INFO,
|
|
|
+ format='%(asctime)s - %(levelname)s - %(message)s'
|
|
|
+)
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
+
|
|
|
+# -------------------------------------------------------------------------------------------------
|
|
|
+
|
|
|
+def generate_product_excel_background():
|
|
|
+ """
|
|
|
+ Function to perform batch attribute extraction for all products and generate an Excel file.
|
|
|
+ Runs in a background thread to avoid blocking the API response.
|
|
|
+ Logs success/failure and saves a status file for external monitoring.
|
|
|
"""
|
|
|
- POST API to upload an Excel file with two sheets:
|
|
|
- 1. 'Products' sheet - Product details
|
|
|
- 2. 'Attribute_values' sheet - Original attribute values
|
|
|
+ logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
|
|
|
|
|
|
- Creates/updates both products and their attribute values in a single transaction.
|
|
|
+ successful = 0
|
|
|
+ failed = 0
|
|
|
+ results = [] # To store detailed extraction results for Excel sheet 2
|
|
|
+
|
|
|
+ # Function to write status file (SUCCESS/FAILED)
|
|
|
+ def write_status(status_type, error_msg=None):
|
|
|
+ status_data = {
|
|
|
+ "status": status_type,
|
|
|
+ "timestamp": datetime.now().isoformat(),
|
|
|
+ "products_processed": successful + failed,
|
|
|
+ "products_successful": successful,
|
|
|
+ "products_failed": failed,
|
|
|
+ "excel_path": os.path.join(settings.MEDIA_URL, 'generated_products.xlsx') if status_type == "SUCCESS" else None,
|
|
|
+ "log_path": os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
|
|
|
+ "error_message": error_msg
|
|
|
+ }
|
|
|
+ try:
|
|
|
+ with open(STATUS_FILE_PATH, 'w') as f:
|
|
|
+ json.dump(status_data, f, indent=4)
|
|
|
+ except Exception as e:
|
|
|
+ logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 1. PREFETCH all necessary related data to minimize database queries
|
|
|
+
|
|
|
+ # Prefetch possible values for mandatory attributes
|
|
|
+ possible_values_prefetch = Prefetch(
|
|
|
+ 'attributes',
|
|
|
+ queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
|
|
|
+ )
|
|
|
+
|
|
|
+ # Fetch all ProductTypes with their mandatory attributes and possible values
|
|
|
+ all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
|
|
|
+ product_type_map = {
|
|
|
+ pt.name: pt for pt in all_product_types
|
|
|
+ }
|
|
|
+
|
|
|
+ # Prepare product_list for batch extraction
|
|
|
+ all_products = Product.objects.all()
|
|
|
+ product_list = []
|
|
|
+
|
|
|
+ for p in all_products:
|
|
|
+ # mandatory_attrs will be the dictionary required by the service
|
|
|
+ mandatory_attrs_dict = {}
|
|
|
+ product_type_name = p.product_type.strip() if p.product_type else None
|
|
|
+
|
|
|
+ if product_type_name and product_type_name in product_type_map:
|
|
|
+ pt = product_type_map[product_type_name]
|
|
|
+
|
|
|
+ # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
|
|
|
+ for attr in pt.attributes.all(): # .all() here works because we used Prefetch for 'attributes'
|
|
|
+ # attr.possible_values.all() works because we used prefetch_related('possible_values')
|
|
|
+ mandatory_attrs_dict[attr.name] = [
|
|
|
+ pv.value for pv in attr.possible_values.all()
|
|
|
+ ]
|
|
|
+
|
|
|
+ product_list.append({
|
|
|
+ "item_id": p.item_id,
|
|
|
+ "product_type_name": product_type_name,
|
|
|
+ "mandatory_attrs": mandatory_attrs_dict # <-- FIX: Pass the dictionary here
|
|
|
+ })
|
|
|
+
|
|
|
+ # Batch settings (using defaults)
|
|
|
+ model = "llama-3.1-8b-instant"
|
|
|
+ extract_additional = True
|
|
|
+ process_image = False
|
|
|
+ multiple = []
|
|
|
+ threshold_abs = 0.65
|
|
|
+ margin = 0.15
|
|
|
+ use_dynamic_thresholds = True
|
|
|
+ use_adaptive_margin = True
|
|
|
+ use_semantic_clustering = True
|
|
|
+
|
|
|
+ # Batch extraction logic
|
|
|
+ item_ids = [p['item_id'] for p in product_list]
|
|
|
+ products_queryset = Product.objects.filter(item_id__in=item_ids)
|
|
|
+ product_map = {product.item_id: product for product in products_queryset}
|
|
|
+ found_ids = set(product_map.keys())
|
|
|
+
|
|
|
+ for product_entry in product_list:
|
|
|
+ item_id = product_entry['item_id']
|
|
|
+ # FIX: mandatory_attrs is now correctly a dictionary (or an empty dictionary)
|
|
|
+ mandatory_attrs = product_entry['mandatory_attrs']
|
|
|
+
|
|
|
+ if item_id not in found_ids:
|
|
|
+ failed += 1
|
|
|
+ results.append({
|
|
|
+ "product_id": item_id,
|
|
|
+ "error": "Product not found in database"
|
|
|
+ })
|
|
|
+ logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
|
|
|
+ continue
|
|
|
+
|
|
|
+ product = product_map[item_id]
|
|
|
+
|
|
|
+ try:
|
|
|
+ title = product.product_name
|
|
|
+ short_desc = product.product_short_description
|
|
|
+ long_desc = product.product_long_description
|
|
|
+ image_url = product.image_path
|
|
|
+
|
|
|
+ ocr_results = None
|
|
|
+ ocr_text = None
|
|
|
+ visual_results = None
|
|
|
+
|
|
|
+ if process_image and image_url:
|
|
|
+ logger.info(f"Processing image for product {item_id}...")
|
|
|
+ # OCR Processing
|
|
|
+ ocr_service = OCRService()
|
|
|
+ ocr_results = ocr_service.process_image(image_url)
|
|
|
+
|
|
|
+ if ocr_results and ocr_results.get("detected_text"):
|
|
|
+ # NOTE: Assuming ProductAttributeService.extract_attributes_from_ocr exists
|
|
|
+ ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
|
|
|
+ ocr_results, model
|
|
|
+ )
|
|
|
+ ocr_results["extracted_attributes"] = ocr_attrs
|
|
|
+ ocr_text = "\n".join([
|
|
|
+ f"{item['text']} (confidence: {item['confidence']:.2f})"
|
|
|
+ for item in ocr_results["detected_text"]
|
|
|
+ ])
|
|
|
+
|
|
|
+ # Visual Processing
|
|
|
+ visual_service = VisualProcessingService()
|
|
|
+ product_type_hint = product.product_type if product.product_type else None
|
|
|
+ visual_results = visual_service.process_image(image_url, product_type_hint)
|
|
|
+
|
|
|
+ if visual_results and visual_results.get('visual_attributes'):
|
|
|
+ visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
|
|
|
+ visual_results['visual_attributes']
|
|
|
+ )
|
|
|
+ logger.info(f"Image processing done for product {item_id}.")
|
|
|
+
|
|
|
+
|
|
|
+ # Combine product text with source tracking
|
|
|
+ product_text, source_map = ProductAttributeService.combine_product_text(
|
|
|
+ title=title,
|
|
|
+ short_desc=short_desc,
|
|
|
+ long_desc=long_desc,
|
|
|
+ ocr_text=ocr_text
|
|
|
+ )
|
|
|
+
|
|
|
+ # Attribute Extraction with source tracking
|
|
|
+ extracted = ProductAttributeService.extract_attributes(
|
|
|
+ product_text=product_text,
|
|
|
+ mandatory_attrs=mandatory_attrs, # <-- This is now the dictionary with possible values
|
|
|
+ source_map=source_map,
|
|
|
+ model=model,
|
|
|
+ extract_additional=extract_additional,
|
|
|
+ multiple=multiple,
|
|
|
+ threshold_abs=threshold_abs,
|
|
|
+ margin=margin,
|
|
|
+ use_dynamic_thresholds=use_dynamic_thresholds,
|
|
|
+ use_adaptive_margin=use_adaptive_margin,
|
|
|
+ use_semantic_clustering=use_semantic_clustering
|
|
|
+ )
|
|
|
+
|
|
|
+ result = {
|
|
|
+ "product_id": item_id,
|
|
|
+ "mandatory": extracted.get("mandatory", {}),
|
|
|
+ "additional": extracted.get("additional", {}),
|
|
|
+ }
|
|
|
+
|
|
|
+ if ocr_results:
|
|
|
+ result["ocr_results"] = ocr_results
|
|
|
+
|
|
|
+ if visual_results:
|
|
|
+ result["visual_results"] = visual_results
|
|
|
+
|
|
|
+ results.append(result)
|
|
|
+ successful += 1
|
|
|
+ logger.info(f"Attribute extraction successful for product {item_id}.")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ failed += 1
|
|
|
+ results.append({
|
|
|
+ "product_id": item_id,
|
|
|
+ "error": str(e)
|
|
|
+ })
|
|
|
+ # Original Error: AttributeError: 'list' object has no attribute 'items'
|
|
|
+ # This should now be fixed, but we keep the robust exception handling.
|
|
|
+ logger.exception(f"Error during attribute extraction for product {item_id}.")
|
|
|
+
|
|
|
+ logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
|
|
|
+
|
|
|
+ # --------------------------------------------------------------------------------
|
|
|
+ # Generate and save the Excel file (Unchanged)
|
|
|
+ # --------------------------------------------------------------------------------
|
|
|
+ wb = Workbook()
|
|
|
+
|
|
|
+ # Sheet 1: Products (from DB)
|
|
|
+ ws_products = wb.active
|
|
|
+ ws_products.title = "Products"
|
|
|
+ products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
|
|
|
+ header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
|
|
|
+ header_font = Font(bold=True, color="FFFFFF")
|
|
|
+
|
|
|
+ for col_num, header in enumerate(products_headers, 1):
|
|
|
+ cell = ws_products.cell(row=1, column=col_num)
|
|
|
+ cell.value = header
|
|
|
+ cell.fill = header_fill
|
|
|
+ cell.font = header_font
|
|
|
+ cell.alignment = Alignment(horizontal="center", vertical="center")
|
|
|
+
|
|
|
+ all_products_db = Product.objects.all()
|
|
|
+ for row_num, product in enumerate(all_products_db, 2):
|
|
|
+ ws_products.cell(row=row_num, column=1, value=product.item_id)
|
|
|
+ ws_products.cell(row=row_num, column=2, value=product.product_name)
|
|
|
+ ws_products.cell(row=row_num, column=3, value=product.product_type)
|
|
|
+ ws_products.cell(row=row_num, column=4, value=product.product_short_description)
|
|
|
+ ws_products.cell(row=row_num, column=5, value=product.product_long_description)
|
|
|
+ ws_products.cell(row=row_num, column=6, value=product.image_path)
|
|
|
+
|
|
|
+ # Adjust column widths
|
|
|
+ for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
|
|
|
+ ws_products.column_dimensions[col_dim].width = width
|
|
|
+
|
|
|
+ # Sheet 2: Attribute_values (item_id, attribute_name, original_value, generated_value)
|
|
|
+ ws_attributes = wb.create_sheet("Attribute_values")
|
|
|
+ attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
|
|
|
+ for col_num, header in enumerate(attributes_headers, 1):
|
|
|
+ cell = ws_attributes.cell(row=1, column=col_num)
|
|
|
+ cell.value = header
|
|
|
+ cell.fill = header_fill
|
|
|
+ cell.font = header_font
|
|
|
+ cell.alignment = Alignment(horizontal="center", vertical="center")
|
|
|
+
|
|
|
+ # Fetch all original attributes
|
|
|
+ row_num = 2
|
|
|
+ all_original_attrs = ProductAttributeValue.objects.all()
|
|
|
+ # Create a lookup for original attributes by item_id and attribute_name
|
|
|
+ original_attrs_lookup = {
|
|
|
+ (attr.product.item_id, attr.attribute_name): attr.original_value
|
|
|
+ for attr in all_original_attrs
|
|
|
+ }
|
|
|
+
|
|
|
+ # Add attributes (original and generated)
|
|
|
+ processed_original_keys = set()
|
|
|
+ for res in results:
|
|
|
+ item_id = res["product_id"]
|
|
|
+
|
|
|
+ if "error" in res:
|
|
|
+ # Add existing original attributes for failed products to the sheet
|
|
|
+ for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
|
|
|
+ if orig_item_id == item_id:
|
|
|
+ ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
|
|
|
+ ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
|
|
|
+ ws_attributes.cell(row=row_num, column=3, value=orig_value)
|
|
|
+ ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
|
|
|
+ processed_original_keys.add((orig_item_id, orig_attr_name))
|
|
|
+ row_num += 1
|
|
|
+ continue
|
|
|
+
|
|
|
+ # Combine all generated attributes (mandatory, additional, OCR, visual)
|
|
|
+ generated_attrs = {}
|
|
|
+ for cat in ["mandatory", "additional"]:
|
|
|
+ attrs = res.get(cat, {})
|
|
|
+ for attr_name, values in attrs.items():
|
|
|
+ for val in values:
|
|
|
+ key = (item_id, attr_name)
|
|
|
+ if key not in generated_attrs:
|
|
|
+ generated_attrs[key] = []
|
|
|
+ generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
|
|
|
+
|
|
|
+ # OCR extracted
|
|
|
+ ocr = res.get("ocr_results")
|
|
|
+ if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
|
|
|
+ for attr_name, values in ocr["extracted_attributes"].items():
|
|
|
+ for val in values:
|
|
|
+ key = (item_id, attr_name)
|
|
|
+ if key not in generated_attrs:
|
|
|
+ generated_attrs[key] = []
|
|
|
+ generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
|
|
|
+
|
|
|
+ # Visual extracted
|
|
|
+ visual = res.get("visual_results")
|
|
|
+ if visual and "visual_attributes" in visual:
|
|
|
+ vis_attrs = visual["visual_attributes"]
|
|
|
+ if isinstance(vis_attrs, dict):
|
|
|
+ # Handle dict format where value might be a list of dicts or a single value
|
|
|
+ for attr_name, values in vis_attrs.items():
|
|
|
+ if not isinstance(values, list):
|
|
|
+ values = [{"value": values, "source": "visual"}]
|
|
|
+ for val in values:
|
|
|
+ key = (item_id, attr_name)
|
|
|
+ if key not in generated_attrs:
|
|
|
+ generated_attrs[key] = []
|
|
|
+ generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
|
|
|
+ elif isinstance(vis_attrs, list):
|
|
|
+ # Handle list of dicts format
|
|
|
+ for item in vis_attrs:
|
|
|
+ attr_name = item.get("attribute_name") or item.get("name")
|
|
|
+ if not attr_name: continue
|
|
|
+ value = item.get("value", "")
|
|
|
+ source = item.get("source", "visual")
|
|
|
+ key = (item_id, attr_name)
|
|
|
+ if key not in generated_attrs:
|
|
|
+ generated_attrs[key] = []
|
|
|
+ generated_attrs[key].append(f"{value} (source: {source})")
|
|
|
+
|
|
|
+
|
|
|
+ # Write attributes to Excel
|
|
|
+ for (attr_item_id, attr_name), gen_values in generated_attrs.items():
|
|
|
+ # Get original value from lookup (if it exists)
|
|
|
+ original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
|
|
|
+ # Combine multiple generated values into a single string
|
|
|
+ generated_value = "; ".join(gen_values) if gen_values else ""
|
|
|
+ # Write row
|
|
|
+ ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
|
|
|
+ ws_attributes.cell(row=row_num, column=2, value=attr_name)
|
|
|
+ ws_attributes.cell(row=row_num, column=3, value=original_value)
|
|
|
+ ws_attributes.cell(row=row_num, column=4, value=generated_value)
|
|
|
+ processed_original_keys.add((attr_item_id, attr_name))
|
|
|
+ row_num += 1
|
|
|
+
|
|
|
+ # Add original attributes that have no generated values for this item_id
|
|
|
+ for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
|
|
|
+ if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
|
|
|
+ ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
|
|
|
+ ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
|
|
|
+ ws_attributes.cell(row=row_num, column=3, value=orig_value)
|
|
|
+ ws_attributes.cell(row=row_num, column=4, value="") # No generated value
|
|
|
+ processed_original_keys.add((orig_item_id, orig_attr_name))
|
|
|
+ row_num += 1
|
|
|
+
|
|
|
+ # Add original attributes for products not included in the 'results' (e.g. if they didn't exist in product_list)
|
|
|
+ # We assume all products are in product_list, so this step might be redundant, but safe for completeness.
|
|
|
+ for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
|
|
|
+ if (orig_item_id, orig_attr_name) not in processed_original_keys:
|
|
|
+ ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
|
|
|
+ ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
|
|
|
+ ws_attributes.cell(row=row_num, column=3, value=orig_value)
|
|
|
+ ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
|
|
|
+ row_num += 1
|
|
|
+
|
|
|
+
|
|
|
+ # Adjust column widths for attributes
|
|
|
+ for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
|
|
|
+ ws_attributes.column_dimensions[col_dim].width = width
|
|
|
+
|
|
|
+ # Save the generated Excel (replace existing)
|
|
|
+ save_path = os.path.join(settings.MEDIA_ROOT, 'generated_products.xlsx')
|
|
|
+ wb.save(save_path)
|
|
|
+ logger.info(f"Excel file successfully saved to {save_path}")
|
|
|
+
|
|
|
+ # Write SUCCESS status
|
|
|
+ write_status("SUCCESS")
|
|
|
+ logger.info("Background task finished successfully.")
|
|
|
+
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ # Log the critical error and write FAILED status
|
|
|
+ logger.exception("CRITICAL ERROR during background Excel generation process.")
|
|
|
+ write_status("FAILED", error_msg=str(e))
|
|
|
+
|
|
|
+
|
|
|
+# -------------------------------------------------------------------------------------------------
|
|
|
+
|
|
|
+class ProductUploadExcelView(APIView):
|
|
|
+ """
|
|
|
+ POST API to upload an Excel file. (Unchanged)
|
|
|
"""
|
|
|
parser_classes = (MultiPartParser, FormParser)
|
|
|
|
|
|
@@ -613,10 +1222,11 @@ class ProductUploadExcelView(APIView):
|
|
|
|
|
|
# Check if required sheets exist
|
|
|
if 'Products' not in excel_file.sheet_names:
|
|
|
- return Response({
|
|
|
- 'error': "Missing 'Products' sheet",
|
|
|
- 'available_sheets': excel_file.sheet_names
|
|
|
- }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
+ logger.error(f"Upload failed: Missing 'Products' sheet in file.")
|
|
|
+ return Response({
|
|
|
+ 'error': "Missing 'Products' sheet",
|
|
|
+ 'available_sheets': excel_file.sheet_names
|
|
|
+ }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
|
|
# Read Products sheet
|
|
|
df_products = pd.read_excel(excel_file, sheet_name='Products')
|
|
|
@@ -624,37 +1234,35 @@ class ProductUploadExcelView(APIView):
|
|
|
|
|
|
# Check required columns for Products
|
|
|
expected_product_cols = {
|
|
|
- 'item_id',
|
|
|
- 'product_name',
|
|
|
- 'product_long_description',
|
|
|
- 'product_short_description',
|
|
|
- 'product_type',
|
|
|
- 'image_path'
|
|
|
+ 'item_id', 'product_name', 'product_long_description',
|
|
|
+ 'product_short_description', 'product_type', 'image_path'
|
|
|
}
|
|
|
|
|
|
if not expected_product_cols.issubset(df_products.columns):
|
|
|
- return Response({
|
|
|
- 'error': 'Missing required columns in Products sheet',
|
|
|
- 'required_columns': list(expected_product_cols),
|
|
|
- 'found_columns': list(df_products.columns)
|
|
|
- }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
+ logger.error(f"Upload failed: Missing required columns in Products sheet.")
|
|
|
+ return Response({
|
|
|
+ 'error': 'Missing required columns in Products sheet',
|
|
|
+ 'required_columns': list(expected_product_cols),
|
|
|
+ 'found_columns': list(df_products.columns)
|
|
|
+ }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
|
|
# Read Attribute_values sheet if it exists
|
|
|
df_attributes = None
|
|
|
has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
|
|
|
|
|
|
if has_attributes_sheet:
|
|
|
- df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
|
|
|
- df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
|
|
|
-
|
|
|
- # Check required columns for Attribute_values
|
|
|
- expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
|
|
|
- if not expected_attr_cols.issubset(df_attributes.columns):
|
|
|
- return Response({
|
|
|
- 'error': 'Missing required columns in Attribute_values sheet',
|
|
|
- 'required_columns': list(expected_attr_cols),
|
|
|
- 'found_columns': list(df_attributes.columns)
|
|
|
- }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
+ df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
|
|
|
+ df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
|
|
|
+
|
|
|
+ # Check required columns for Attribute_values
|
|
|
+ expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
|
|
|
+ if not expected_attr_cols.issubset(df_attributes.columns):
|
|
|
+ logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
|
|
|
+ return Response({
|
|
|
+ 'error': 'Missing required columns in Attribute_values sheet',
|
|
|
+ 'required_columns': list(expected_attr_cols),
|
|
|
+ 'found_columns': list(df_attributes.columns)
|
|
|
+ }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
|
|
# Initialize counters
|
|
|
products_created = 0
|
|
|
@@ -667,133 +1275,158 @@ class ProductUploadExcelView(APIView):
|
|
|
|
|
|
# Use transaction to ensure atomicity
|
|
|
with transaction.atomic():
|
|
|
- # Process Products sheet
|
|
|
- for idx, row in df_products.iterrows():
|
|
|
- item_id = str(row.get('item_id', '')).strip()
|
|
|
- if not item_id:
|
|
|
- products_failed += 1
|
|
|
- errors.append(f"Products Row {idx + 2}: Missing item_id")
|
|
|
- continue
|
|
|
-
|
|
|
- try:
|
|
|
- defaults = {
|
|
|
- 'product_name': str(row.get('product_name', '')),
|
|
|
- 'product_long_description': str(row.get('product_long_description', '')),
|
|
|
- 'product_short_description': str(row.get('product_short_description', '')),
|
|
|
- 'product_type': str(row.get('product_type', '')),
|
|
|
- 'image_path': str(row.get('image_path', '')),
|
|
|
- }
|
|
|
-
|
|
|
- obj, created = Product.objects.update_or_create(
|
|
|
- item_id=item_id,
|
|
|
- defaults=defaults
|
|
|
- )
|
|
|
-
|
|
|
- if created:
|
|
|
- products_created += 1
|
|
|
- else:
|
|
|
- products_updated += 1
|
|
|
- except Exception as e:
|
|
|
- products_failed += 1
|
|
|
- errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
|
|
|
-
|
|
|
- # Process Attribute_values sheet if it exists
|
|
|
- if has_attributes_sheet and df_attributes is not None:
|
|
|
- # Group by item_id to optimize lookups
|
|
|
- item_ids_in_attrs = df_attributes['item_id'].unique()
|
|
|
-
|
|
|
- # Fetch all products at once
|
|
|
- existing_products = {
|
|
|
- p.item_id: p
|
|
|
- for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
|
|
|
- }
|
|
|
-
|
|
|
- for idx, row in df_attributes.iterrows():
|
|
|
- item_id = str(row.get('item_id', '')).strip()
|
|
|
- attribute_name = str(row.get('attribute_name', '')).strip()
|
|
|
- original_value = str(row.get('original_value', '')).strip()
|
|
|
-
|
|
|
- if not item_id or not attribute_name:
|
|
|
- attributes_failed += 1
|
|
|
- errors.append(
|
|
|
- f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
|
|
|
- )
|
|
|
- continue
|
|
|
-
|
|
|
- # Check if product exists
|
|
|
- product = existing_products.get(item_id)
|
|
|
- if not product:
|
|
|
- attributes_failed += 1
|
|
|
- errors.append(
|
|
|
- f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
|
|
|
- "Make sure it exists in Products sheet."
|
|
|
- )
|
|
|
- continue
|
|
|
-
|
|
|
- try:
|
|
|
- attr_obj, created = ProductAttributeValue.objects.update_or_create(
|
|
|
- product=product,
|
|
|
- attribute_name=attribute_name,
|
|
|
- defaults={'original_value': original_value}
|
|
|
- )
|
|
|
-
|
|
|
- if created:
|
|
|
- attributes_created += 1
|
|
|
- else:
|
|
|
- attributes_updated += 1
|
|
|
- except Exception as e:
|
|
|
- attributes_failed += 1
|
|
|
- errors.append(
|
|
|
- f"Attribute_values Row {idx + 2} "
|
|
|
- f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
|
|
|
- )
|
|
|
-
|
|
|
- # Prepare response
|
|
|
+ # Process Products sheet
|
|
|
+ for idx, row in df_products.iterrows():
|
|
|
+ item_id = str(row.get('item_id', '')).strip()
|
|
|
+ product_type = str(row.get('product_type', '')).strip()
|
|
|
+
|
|
|
+ if not item_id:
|
|
|
+ products_failed += 1
|
|
|
+ errors.append(f"Products Row {idx + 2}: Missing item_id")
|
|
|
+ continue
|
|
|
+
|
|
|
+ try:
|
|
|
+ # Auto-create ProductType if provided and doesn't exist
|
|
|
+ if product_type:
|
|
|
+ ProductType.objects.get_or_create(name=product_type)
|
|
|
+
|
|
|
+ defaults = {
|
|
|
+ 'product_name': str(row.get('product_name', '')),
|
|
|
+ 'product_long_description': str(row.get('product_long_description', '')),
|
|
|
+ 'product_short_description': str(row.get('product_short_description', '')),
|
|
|
+ 'product_type': product_type,
|
|
|
+ 'image_path': str(row.get('image_path', '')),
|
|
|
+ }
|
|
|
+
|
|
|
+ obj, created = Product.objects.update_or_create(
|
|
|
+ item_id=item_id,
|
|
|
+ defaults=defaults
|
|
|
+ )
|
|
|
+
|
|
|
+ if created:
|
|
|
+ products_created += 1
|
|
|
+ else:
|
|
|
+ products_updated += 1
|
|
|
+ except Exception as e:
|
|
|
+ products_failed += 1
|
|
|
+ errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
|
|
|
+ logger.error(f"Error processing product {item_id} in Products sheet: {e}")
|
|
|
+
|
|
|
+
|
|
|
+ # Process Attribute_values sheet if it exists
|
|
|
+ if has_attributes_sheet and df_attributes is not None:
|
|
|
+ # Group by item_id to optimize lookups
|
|
|
+ item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
|
|
|
+
|
|
|
+ # Fetch all products at once
|
|
|
+ existing_products = {
|
|
|
+ p.item_id: p
|
|
|
+ for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
|
|
|
+ }
|
|
|
+
|
|
|
+ for idx, row in df_attributes.iterrows():
|
|
|
+ item_id = str(row.get('item_id', '')).strip()
|
|
|
+ attribute_name = str(row.get('attribute_name', '')).strip()
|
|
|
+ original_value = str(row.get('original_value', '')).strip()
|
|
|
+
|
|
|
+ if not item_id or not attribute_name:
|
|
|
+ attributes_failed += 1
|
|
|
+ errors.append(
|
|
|
+ f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
|
|
|
+ )
|
|
|
+ continue
|
|
|
+
|
|
|
+ # Check if product exists
|
|
|
+ product = existing_products.get(item_id)
|
|
|
+ if not product:
|
|
|
+ attributes_failed += 1
|
|
|
+ errors.append(
|
|
|
+ f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
|
|
|
+ "Make sure it exists in Products sheet."
|
|
|
+ )
|
|
|
+ continue
|
|
|
+
|
|
|
+ try:
|
|
|
+ attr_obj, created = ProductAttributeValue.objects.update_or_create(
|
|
|
+ product=product,
|
|
|
+ attribute_name=attribute_name,
|
|
|
+ defaults={'original_value': original_value}
|
|
|
+ )
|
|
|
+
|
|
|
+ if created:
|
|
|
+ attributes_created += 1
|
|
|
+ else:
|
|
|
+ attributes_updated += 1
|
|
|
+ except Exception as e:
|
|
|
+ attributes_failed += 1
|
|
|
+ errors.append(
|
|
|
+ f"Attribute_values Row {idx + 2} "
|
|
|
+ f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
|
|
|
+ )
|
|
|
+ logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
|
|
|
+
|
|
|
+ # Prepare response data
|
|
|
response_data = {
|
|
|
- 'message': 'Upload completed successfully',
|
|
|
+ 'message': 'Upload completed',
|
|
|
'products': {
|
|
|
'created': products_created,
|
|
|
'updated': products_updated,
|
|
|
'failed': products_failed,
|
|
|
'total_processed': products_created + products_updated + products_failed
|
|
|
- }
|
|
|
+ },
|
|
|
+ 'attribute_values': {
|
|
|
+ 'created': attributes_created,
|
|
|
+ 'updated': attributes_updated,
|
|
|
+ 'failed': attributes_failed,
|
|
|
+ 'total_processed': attributes_created + attributes_updated + attributes_failed
|
|
|
+ } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
|
|
|
+ 'generated_excel_status': 'Excel generation started in the background.'
|
|
|
}
|
|
|
|
|
|
- if has_attributes_sheet:
|
|
|
- response_data['attribute_values'] = {
|
|
|
- 'created': attributes_created,
|
|
|
- 'updated': attributes_updated,
|
|
|
- 'failed': attributes_failed,
|
|
|
- 'total_processed': attributes_created + attributes_updated + attributes_failed
|
|
|
- }
|
|
|
- else:
|
|
|
- response_data['attribute_values'] = {
|
|
|
- 'message': 'Attribute_values sheet not found in Excel file'
|
|
|
- }
|
|
|
-
|
|
|
if errors:
|
|
|
- response_data['errors'] = errors[:50] # Limit to first 50 errors
|
|
|
+ response_data['errors'] = errors[:50]
|
|
|
if len(errors) > 50:
|
|
|
response_data['errors'].append(f"... and {len(errors) - 50} more errors")
|
|
|
|
|
|
- # Determine status code
|
|
|
- if products_failed > 0 or attributes_failed > 0:
|
|
|
- status_code = status.HTTP_207_MULTI_STATUS
|
|
|
+ # Determine status code for upload
|
|
|
+ upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
|
|
|
+
|
|
|
+ # Start background thread for Excel generation if upload was successful
|
|
|
+ if products_failed == 0 and attributes_failed == 0:
|
|
|
+ logger.info("API call successful. Triggering background Excel generation thread.")
|
|
|
+ threading.Thread(target=generate_product_excel_background, daemon=True).start()
|
|
|
+
|
|
|
+ # Update response to provide monitoring paths
|
|
|
+ response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
|
|
|
+ response_data['monitoring'] = {
|
|
|
+ 'excel_file': os.path.join(settings.MEDIA_URL, 'generated_products.xlsx'),
|
|
|
+ 'status_file': os.path.join(settings.MEDIA_URL, 'excel_generation_status.json'),
|
|
|
+ 'log_file': os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
|
|
|
+ 'note': 'These files will be available once the background process completes.'
|
|
|
+ }
|
|
|
else:
|
|
|
- status_code = status.HTTP_201_CREATED
|
|
|
+ logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
|
|
|
+ response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
|
|
|
|
|
|
- return Response(response_data, status=status_code)
|
|
|
+
|
|
|
+ return Response(response_data, status=upload_status)
|
|
|
|
|
|
except pd.errors.EmptyDataError:
|
|
|
+ logger.error('The uploaded Excel file is empty or invalid.')
|
|
|
return Response({
|
|
|
'error': 'The uploaded Excel file is empty or invalid'
|
|
|
}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
except Exception as e:
|
|
|
+ logger.exception(f'An unexpected error occurred while processing the file.')
|
|
|
return Response({
|
|
|
- 'error': f'An error occurred while processing the file: {str(e)}'
|
|
|
+ 'error': f'An unexpected error occurred while processing the file: {str(e)}'
|
|
|
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
|
|
|
|
|
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
# Add this view to your views.py for downloading a template
|
|
|
|
|
|
from django.http import HttpResponse
|