Explorar el Código

excel downloading feature added

Harshit Pathak hace 3 meses
padre
commit
a7dfa6fd4c

+ 767 - 134
attr_extraction/views.py

@@ -592,13 +592,622 @@ import pandas as pd
 from .models import Product, ProductAttributeValue
 
 
-class ProductUploadExcelView(APIView):
+# class ProductUploadExcelView(APIView):
+#     """
+#     POST API to upload an Excel file with two sheets:
+#     1. 'Products' sheet - Product details
+#     2. 'Attribute_values' sheet - Original attribute values
+    
+#     Creates/updates both products and their attribute values in a single transaction.
+#     """
+#     parser_classes = (MultiPartParser, FormParser)
+
+#     def post(self, request, *args, **kwargs):
+#         file_obj = request.FILES.get('file')
+#         if not file_obj:
+#             return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
+
+#         try:
+#             # Read all sheets from Excel file
+#             excel_file = pd.ExcelFile(file_obj)
+            
+#             # Check if required sheets exist
+#             if 'Products' not in excel_file.sheet_names:
+#                 return Response({
+#                     'error': "Missing 'Products' sheet",
+#                     'available_sheets': excel_file.sheet_names
+#                 }, status=status.HTTP_400_BAD_REQUEST)
+            
+#             # Read Products sheet
+#             df_products = pd.read_excel(excel_file, sheet_name='Products')
+#             df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
+
+#             # Check required columns for Products
+#             expected_product_cols = {
+#                 'item_id',
+#                 'product_name',
+#                 'product_long_description',
+#                 'product_short_description',
+#                 'product_type',
+#                 'image_path'
+#             }
+
+#             if not expected_product_cols.issubset(df_products.columns):
+#                 return Response({
+#                     'error': 'Missing required columns in Products sheet',
+#                     'required_columns': list(expected_product_cols),
+#                     'found_columns': list(df_products.columns)
+#                 }, status=status.HTTP_400_BAD_REQUEST)
+
+#             # Read Attribute_values sheet if it exists
+#             df_attributes = None
+#             has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
+            
+#             if has_attributes_sheet:
+#                 df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
+#                 df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
+                
+#                 # Check required columns for Attribute_values
+#                 expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
+#                 if not expected_attr_cols.issubset(df_attributes.columns):
+#                     return Response({
+#                         'error': 'Missing required columns in Attribute_values sheet',
+#                         'required_columns': list(expected_attr_cols),
+#                         'found_columns': list(df_attributes.columns)
+#                     }, status=status.HTTP_400_BAD_REQUEST)
+
+#             # Initialize counters
+#             products_created = 0
+#             products_updated = 0
+#             attributes_created = 0
+#             attributes_updated = 0
+#             products_failed = 0
+#             attributes_failed = 0
+#             errors = []
+
+#             # Use transaction to ensure atomicity
+#             with transaction.atomic():
+#                 # Process Products sheet
+#                 for idx, row in df_products.iterrows():
+#                     item_id = str(row.get('item_id', '')).strip()
+#                     if not item_id:
+#                         products_failed += 1
+#                         errors.append(f"Products Row {idx + 2}: Missing item_id")
+#                         continue
+
+#                     try:
+#                         defaults = {
+#                             'product_name': str(row.get('product_name', '')),
+#                             'product_long_description': str(row.get('product_long_description', '')),
+#                             'product_short_description': str(row.get('product_short_description', '')),
+#                             'product_type': str(row.get('product_type', '')),
+#                             'image_path': str(row.get('image_path', '')),
+#                         }
+
+#                         obj, created = Product.objects.update_or_create(
+#                             item_id=item_id,
+#                             defaults=defaults
+#                         )
+
+#                         if created:
+#                             products_created += 1
+#                         else:
+#                             products_updated += 1
+#                     except Exception as e:
+#                         products_failed += 1
+#                         errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
+
+#                 # Process Attribute_values sheet if it exists
+#                 if has_attributes_sheet and df_attributes is not None:
+#                     # Group by item_id to optimize lookups
+#                     item_ids_in_attrs = df_attributes['item_id'].unique()
+                    
+#                     # Fetch all products at once
+#                     existing_products = {
+#                         p.item_id: p 
+#                         for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
+#                     }
+
+#                     for idx, row in df_attributes.iterrows():
+#                         item_id = str(row.get('item_id', '')).strip()
+#                         attribute_name = str(row.get('attribute_name', '')).strip()
+#                         original_value = str(row.get('original_value', '')).strip()
+
+#                         if not item_id or not attribute_name:
+#                             attributes_failed += 1
+#                             errors.append(
+#                                 f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
+#                             )
+#                             continue
+
+#                         # Check if product exists
+#                         product = existing_products.get(item_id)
+#                         if not product:
+#                             attributes_failed += 1
+#                             errors.append(
+#                                 f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
+#                                 "Make sure it exists in Products sheet."
+#                             )
+#                             continue
+
+#                         try:
+#                             attr_obj, created = ProductAttributeValue.objects.update_or_create(
+#                                 product=product,
+#                                 attribute_name=attribute_name,
+#                                 defaults={'original_value': original_value}
+#                             )
+
+#                             if created:
+#                                 attributes_created += 1
+#                             else:
+#                                 attributes_updated += 1
+#                         except Exception as e:
+#                             attributes_failed += 1
+#                             errors.append(
+#                                 f"Attribute_values Row {idx + 2} "
+#                                 f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
+#                             )
+
+#             # Prepare response
+#             response_data = {
+#                 'message': 'Upload completed successfully',
+#                 'products': {
+#                     'created': products_created,
+#                     'updated': products_updated,
+#                     'failed': products_failed,
+#                     'total_processed': products_created + products_updated + products_failed
+#                 }
+#             }
+
+#             if has_attributes_sheet:
+#                 response_data['attribute_values'] = {
+#                     'created': attributes_created,
+#                     'updated': attributes_updated,
+#                     'failed': attributes_failed,
+#                     'total_processed': attributes_created + attributes_updated + attributes_failed
+#                 }
+#             else:
+#                 response_data['attribute_values'] = {
+#                     'message': 'Attribute_values sheet not found in Excel file'
+#                 }
+
+#             if errors:
+#                 response_data['errors'] = errors[:50]  # Limit to first 50 errors
+#                 if len(errors) > 50:
+#                     response_data['errors'].append(f"... and {len(errors) - 50} more errors")
+
+#             # Determine status code
+#             if products_failed > 0 or attributes_failed > 0:
+#                 status_code = status.HTTP_207_MULTI_STATUS
+#             else:
+#                 status_code = status.HTTP_201_CREATED
+
+#             return Response(response_data, status=status_code)
+
+#         except pd.errors.EmptyDataError:
+#             return Response({
+#                 'error': 'The uploaded Excel file is empty or invalid'
+#             }, status=status.HTTP_400_BAD_REQUEST)
+#         except Exception as e:
+#             return Response({
+#                 'error': f'An error occurred while processing the file: {str(e)}'
+#             }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+
+
+
+
+
+
+import logging
+import json
+from rest_framework.views import APIView
+from rest_framework.response import Response
+from rest_framework import status
+from rest_framework.parsers import MultiPartParser, FormParser
+from django.db import transaction
+from django.db.models import Prefetch, F
+import pandas as pd
+# Import ALL your models
+from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
+from .services import ProductAttributeService
+from .ocr_service import OCRService
+from .visual_processing_service import VisualProcessingService
+from openpyxl import Workbook
+from openpyxl.styles import Font, PatternFill, Alignment
+from django.conf import settings
+import os
+import threading
+from datetime import datetime
+
+# --- Logging Setup ---
+# Define log and status file paths in MEDIA_ROOT
+LOG_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation.log')
+STATUS_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation_status.json')
+
+# Ensure the MEDIA_ROOT exists for files to be saved
+if not os.path.exists(settings.MEDIA_ROOT):
+    os.makedirs(settings.MEDIA_ROOT)
+
+# Configure basic logging
+logging.basicConfig(
+    filename=LOG_FILE_PATH,
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# -------------------------------------------------------------------------------------------------
+
+def generate_product_excel_background():
+    """
+    Function to perform batch attribute extraction for all products and generate an Excel file.
+    Runs in a background thread to avoid blocking the API response.
+    Logs success/failure and saves a status file for external monitoring.
     """
-    POST API to upload an Excel file with two sheets:
-    1. 'Products' sheet - Product details
-    2. 'Attribute_values' sheet - Original attribute values
+    logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
     
-    Creates/updates both products and their attribute values in a single transaction.
+    successful = 0
+    failed = 0
+    results = [] # To store detailed extraction results for Excel sheet 2
+    
+    # Function to write status file (SUCCESS/FAILED)
+    def write_status(status_type, error_msg=None):
+        status_data = {
+            "status": status_type,
+            "timestamp": datetime.now().isoformat(),
+            "products_processed": successful + failed,
+            "products_successful": successful,
+            "products_failed": failed,
+            "excel_path": os.path.join(settings.MEDIA_URL, 'generated_products.xlsx') if status_type == "SUCCESS" else None,
+            "log_path": os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
+            "error_message": error_msg
+        }
+        try:
+            with open(STATUS_FILE_PATH, 'w') as f:
+                json.dump(status_data, f, indent=4)
+        except Exception as e:
+            logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
+
+    try:
+        # 1. PREFETCH all necessary related data to minimize database queries
+        
+        # Prefetch possible values for mandatory attributes
+        possible_values_prefetch = Prefetch(
+            'attributes',
+            queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
+        )
+        
+        # Fetch all ProductTypes with their mandatory attributes and possible values
+        all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
+        product_type_map = {
+            pt.name: pt for pt in all_product_types
+        }
+
+        # Prepare product_list for batch extraction
+        all_products = Product.objects.all()
+        product_list = []
+        
+        for p in all_products:
+            # mandatory_attrs will be the dictionary required by the service
+            mandatory_attrs_dict = {}
+            product_type_name = p.product_type.strip() if p.product_type else None
+            
+            if product_type_name and product_type_name in product_type_map:
+                pt = product_type_map[product_type_name]
+                
+                # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
+                for attr in pt.attributes.all(): # .all() here works because we used Prefetch for 'attributes'
+                    # attr.possible_values.all() works because we used prefetch_related('possible_values')
+                    mandatory_attrs_dict[attr.name] = [
+                        pv.value for pv in attr.possible_values.all()
+                    ]
+            
+            product_list.append({
+                "item_id": p.item_id,
+                "product_type_name": product_type_name,
+                "mandatory_attrs": mandatory_attrs_dict # <-- FIX: Pass the dictionary here
+            })
+
+        # Batch settings (using defaults)
+        model = "llama-3.1-8b-instant"
+        extract_additional = True
+        process_image = False
+        multiple = []
+        threshold_abs = 0.65
+        margin = 0.15
+        use_dynamic_thresholds = True
+        use_adaptive_margin = True
+        use_semantic_clustering = True
+
+        # Batch extraction logic
+        item_ids = [p['item_id'] for p in product_list]
+        products_queryset = Product.objects.filter(item_id__in=item_ids)
+        product_map = {product.item_id: product for product in products_queryset}
+        found_ids = set(product_map.keys())
+
+        for product_entry in product_list:
+            item_id = product_entry['item_id']
+            # FIX: mandatory_attrs is now correctly a dictionary (or an empty dictionary)
+            mandatory_attrs = product_entry['mandatory_attrs'] 
+
+            if item_id not in found_ids:
+                failed += 1
+                results.append({
+                    "product_id": item_id,
+                    "error": "Product not found in database"
+                })
+                logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
+                continue
+
+            product = product_map[item_id]
+
+            try:
+                title = product.product_name
+                short_desc = product.product_short_description
+                long_desc = product.product_long_description
+                image_url = product.image_path
+
+                ocr_results = None
+                ocr_text = None
+                visual_results = None
+
+                if process_image and image_url:
+                    logger.info(f"Processing image for product {item_id}...")
+                    # OCR Processing
+                    ocr_service = OCRService()
+                    ocr_results = ocr_service.process_image(image_url)
+
+                    if ocr_results and ocr_results.get("detected_text"):
+                        # NOTE: Assuming ProductAttributeService.extract_attributes_from_ocr exists
+                        ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
+                             ocr_results, model
+                        )
+                        ocr_results["extracted_attributes"] = ocr_attrs
+                        ocr_text = "\n".join([
+                             f"{item['text']} (confidence: {item['confidence']:.2f})"
+                             for item in ocr_results["detected_text"]
+                        ])
+
+                    # Visual Processing
+                    visual_service = VisualProcessingService()
+                    product_type_hint = product.product_type if product.product_type else None
+                    visual_results = visual_service.process_image(image_url, product_type_hint)
+
+                    if visual_results and visual_results.get('visual_attributes'):
+                        visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
+                            visual_results['visual_attributes']
+                        )
+                    logger.info(f"Image processing done for product {item_id}.")
+
+
+                # Combine product text with source tracking
+                product_text, source_map = ProductAttributeService.combine_product_text(
+                    title=title,
+                    short_desc=short_desc,
+                    long_desc=long_desc,
+                    ocr_text=ocr_text
+                )
+
+                # Attribute Extraction with source tracking
+                extracted = ProductAttributeService.extract_attributes(
+                    product_text=product_text,
+                    mandatory_attrs=mandatory_attrs, # <-- This is now the dictionary with possible values
+                    source_map=source_map,
+                    model=model,
+                    extract_additional=extract_additional,
+                    multiple=multiple,
+                    threshold_abs=threshold_abs,
+                    margin=margin,
+                    use_dynamic_thresholds=use_dynamic_thresholds,
+                    use_adaptive_margin=use_adaptive_margin,
+                    use_semantic_clustering=use_semantic_clustering
+                )
+
+                result = {
+                    "product_id": item_id,
+                    "mandatory": extracted.get("mandatory", {}),
+                    "additional": extracted.get("additional", {}),
+                }
+
+                if ocr_results:
+                    result["ocr_results"] = ocr_results
+
+                if visual_results:
+                    result["visual_results"] = visual_results
+
+                results.append(result)
+                successful += 1
+                logger.info(f"Attribute extraction successful for product {item_id}.")
+
+            except Exception as e:
+                failed += 1
+                results.append({
+                    "product_id": item_id,
+                    "error": str(e)
+                })
+                # Original Error: AttributeError: 'list' object has no attribute 'items'
+                # This should now be fixed, but we keep the robust exception handling.
+                logger.exception(f"Error during attribute extraction for product {item_id}.")
+
+        logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
+        
+        # --------------------------------------------------------------------------------
+        # Generate and save the Excel file (Unchanged)
+        # --------------------------------------------------------------------------------
+        wb = Workbook()
+
+        # Sheet 1: Products (from DB)
+        ws_products = wb.active
+        ws_products.title = "Products"
+        products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
+        header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
+        header_font = Font(bold=True, color="FFFFFF")
+
+        for col_num, header in enumerate(products_headers, 1):
+            cell = ws_products.cell(row=1, column=col_num)
+            cell.value = header
+            cell.fill = header_fill
+            cell.font = header_font
+            cell.alignment = Alignment(horizontal="center", vertical="center")
+
+        all_products_db = Product.objects.all()
+        for row_num, product in enumerate(all_products_db, 2):
+            ws_products.cell(row=row_num, column=1, value=product.item_id)
+            ws_products.cell(row=row_num, column=2, value=product.product_name)
+            ws_products.cell(row=row_num, column=3, value=product.product_type)
+            ws_products.cell(row=row_num, column=4, value=product.product_short_description)
+            ws_products.cell(row=row_num, column=5, value=product.product_long_description)
+            ws_products.cell(row=row_num, column=6, value=product.image_path)
+
+        # Adjust column widths
+        for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
+             ws_products.column_dimensions[col_dim].width = width
+
+        # Sheet 2: Attribute_values (item_id, attribute_name, original_value, generated_value)
+        ws_attributes = wb.create_sheet("Attribute_values")
+        attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
+        for col_num, header in enumerate(attributes_headers, 1):
+            cell = ws_attributes.cell(row=1, column=col_num)
+            cell.value = header
+            cell.fill = header_fill
+            cell.font = header_font
+            cell.alignment = Alignment(horizontal="center", vertical="center")
+
+        # Fetch all original attributes
+        row_num = 2
+        all_original_attrs = ProductAttributeValue.objects.all()
+        # Create a lookup for original attributes by item_id and attribute_name
+        original_attrs_lookup = {
+            (attr.product.item_id, attr.attribute_name): attr.original_value
+            for attr in all_original_attrs
+        }
+
+        # Add attributes (original and generated)
+        processed_original_keys = set()
+        for res in results:
+            item_id = res["product_id"]
+
+            if "error" in res:
+                # Add existing original attributes for failed products to the sheet
+                for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
+                    if orig_item_id == item_id:
+                        ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
+                        ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
+                        ws_attributes.cell(row=row_num, column=3, value=orig_value)
+                        ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
+                        processed_original_keys.add((orig_item_id, orig_attr_name))
+                        row_num += 1
+                continue
+
+            # Combine all generated attributes (mandatory, additional, OCR, visual)
+            generated_attrs = {}
+            for cat in ["mandatory", "additional"]:
+                attrs = res.get(cat, {})
+                for attr_name, values in attrs.items():
+                    for val in values:
+                        key = (item_id, attr_name)
+                        if key not in generated_attrs:
+                            generated_attrs[key] = []
+                        generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
+
+            # OCR extracted
+            ocr = res.get("ocr_results")
+            if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
+                for attr_name, values in ocr["extracted_attributes"].items():
+                    for val in values:
+                        key = (item_id, attr_name)
+                        if key not in generated_attrs:
+                            generated_attrs[key] = []
+                        generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
+
+            # Visual extracted
+            visual = res.get("visual_results")
+            if visual and "visual_attributes" in visual:
+                vis_attrs = visual["visual_attributes"]
+                if isinstance(vis_attrs, dict):
+                    # Handle dict format where value might be a list of dicts or a single value
+                    for attr_name, values in vis_attrs.items():
+                        if not isinstance(values, list):
+                            values = [{"value": values, "source": "visual"}]
+                        for val in values:
+                            key = (item_id, attr_name)
+                            if key not in generated_attrs:
+                                generated_attrs[key] = []
+                            generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
+                elif isinstance(vis_attrs, list):
+                    # Handle list of dicts format
+                    for item in vis_attrs:
+                        attr_name = item.get("attribute_name") or item.get("name")
+                        if not attr_name: continue
+                        value = item.get("value", "")
+                        source = item.get("source", "visual")
+                        key = (item_id, attr_name)
+                        if key not in generated_attrs:
+                            generated_attrs[key] = []
+                        generated_attrs[key].append(f"{value} (source: {source})")
+
+
+            # Write attributes to Excel
+            for (attr_item_id, attr_name), gen_values in generated_attrs.items():
+                # Get original value from lookup (if it exists)
+                original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
+                # Combine multiple generated values into a single string
+                generated_value = "; ".join(gen_values) if gen_values else ""
+                # Write row
+                ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
+                ws_attributes.cell(row=row_num, column=2, value=attr_name)
+                ws_attributes.cell(row=row_num, column=3, value=original_value)
+                ws_attributes.cell(row=row_num, column=4, value=generated_value)
+                processed_original_keys.add((attr_item_id, attr_name))
+                row_num += 1
+
+            # Add original attributes that have no generated values for this item_id
+            for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
+                if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
+                    ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
+                    ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
+                    ws_attributes.cell(row=row_num, column=3, value=orig_value)
+                    ws_attributes.cell(row=row_num, column=4, value="") # No generated value
+                    processed_original_keys.add((orig_item_id, orig_attr_name))
+                    row_num += 1
+        
+        # Add original attributes for products not included in the 'results' (e.g. if they didn't exist in product_list)
+        # We assume all products are in product_list, so this step might be redundant, but safe for completeness.
+        for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
+            if (orig_item_id, orig_attr_name) not in processed_original_keys:
+                ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
+                ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
+                ws_attributes.cell(row=row_num, column=3, value=orig_value)
+                ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
+                row_num += 1
+
+
+        # Adjust column widths for attributes
+        for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
+             ws_attributes.column_dimensions[col_dim].width = width
+
+        # Save the generated Excel (replace existing)
+        save_path = os.path.join(settings.MEDIA_ROOT, 'generated_products.xlsx')
+        wb.save(save_path)
+        logger.info(f"Excel file successfully saved to {save_path}")
+        
+        # Write SUCCESS status
+        write_status("SUCCESS")
+        logger.info("Background task finished successfully.")
+
+
+    except Exception as e:
+        # Log the critical error and write FAILED status
+        logger.exception("CRITICAL ERROR during background Excel generation process.")
+        write_status("FAILED", error_msg=str(e))
+
+
+# -------------------------------------------------------------------------------------------------
+
+class ProductUploadExcelView(APIView):
+    """
+    POST API to upload an Excel file. (Unchanged)
     """
     parser_classes = (MultiPartParser, FormParser)
 
@@ -613,10 +1222,11 @@ class ProductUploadExcelView(APIView):
             
             # Check if required sheets exist
             if 'Products' not in excel_file.sheet_names:
-                return Response({
-                    'error': "Missing 'Products' sheet",
-                    'available_sheets': excel_file.sheet_names
-                }, status=status.HTTP_400_BAD_REQUEST)
+                 logger.error(f"Upload failed: Missing 'Products' sheet in file.")
+                 return Response({
+                     'error': "Missing 'Products' sheet",
+                     'available_sheets': excel_file.sheet_names
+                 }, status=status.HTTP_400_BAD_REQUEST)
             
             # Read Products sheet
             df_products = pd.read_excel(excel_file, sheet_name='Products')
@@ -624,37 +1234,35 @@ class ProductUploadExcelView(APIView):
 
             # Check required columns for Products
             expected_product_cols = {
-                'item_id',
-                'product_name',
-                'product_long_description',
-                'product_short_description',
-                'product_type',
-                'image_path'
+                 'item_id', 'product_name', 'product_long_description',
+                 'product_short_description', 'product_type', 'image_path'
             }
 
             if not expected_product_cols.issubset(df_products.columns):
-                return Response({
-                    'error': 'Missing required columns in Products sheet',
-                    'required_columns': list(expected_product_cols),
-                    'found_columns': list(df_products.columns)
-                }, status=status.HTTP_400_BAD_REQUEST)
+                 logger.error(f"Upload failed: Missing required columns in Products sheet.")
+                 return Response({
+                     'error': 'Missing required columns in Products sheet',
+                     'required_columns': list(expected_product_cols),
+                     'found_columns': list(df_products.columns)
+                 }, status=status.HTTP_400_BAD_REQUEST)
 
             # Read Attribute_values sheet if it exists
             df_attributes = None
             has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
             
             if has_attributes_sheet:
-                df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
-                df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
-                
-                # Check required columns for Attribute_values
-                expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
-                if not expected_attr_cols.issubset(df_attributes.columns):
-                    return Response({
-                        'error': 'Missing required columns in Attribute_values sheet',
-                        'required_columns': list(expected_attr_cols),
-                        'found_columns': list(df_attributes.columns)
-                    }, status=status.HTTP_400_BAD_REQUEST)
+                 df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
+                 df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
+                 
+                 # Check required columns for Attribute_values
+                 expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
+                 if not expected_attr_cols.issubset(df_attributes.columns):
+                     logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
+                     return Response({
+                          'error': 'Missing required columns in Attribute_values sheet',
+                          'required_columns': list(expected_attr_cols),
+                          'found_columns': list(df_attributes.columns)
+                     }, status=status.HTTP_400_BAD_REQUEST)
 
             # Initialize counters
             products_created = 0
@@ -667,133 +1275,158 @@ class ProductUploadExcelView(APIView):
 
             # Use transaction to ensure atomicity
             with transaction.atomic():
-                # Process Products sheet
-                for idx, row in df_products.iterrows():
-                    item_id = str(row.get('item_id', '')).strip()
-                    if not item_id:
-                        products_failed += 1
-                        errors.append(f"Products Row {idx + 2}: Missing item_id")
-                        continue
-
-                    try:
-                        defaults = {
-                            'product_name': str(row.get('product_name', '')),
-                            'product_long_description': str(row.get('product_long_description', '')),
-                            'product_short_description': str(row.get('product_short_description', '')),
-                            'product_type': str(row.get('product_type', '')),
-                            'image_path': str(row.get('image_path', '')),
-                        }
-
-                        obj, created = Product.objects.update_or_create(
-                            item_id=item_id,
-                            defaults=defaults
-                        )
-
-                        if created:
-                            products_created += 1
-                        else:
-                            products_updated += 1
-                    except Exception as e:
-                        products_failed += 1
-                        errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
-
-                # Process Attribute_values sheet if it exists
-                if has_attributes_sheet and df_attributes is not None:
-                    # Group by item_id to optimize lookups
-                    item_ids_in_attrs = df_attributes['item_id'].unique()
-                    
-                    # Fetch all products at once
-                    existing_products = {
-                        p.item_id: p 
-                        for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
-                    }
-
-                    for idx, row in df_attributes.iterrows():
-                        item_id = str(row.get('item_id', '')).strip()
-                        attribute_name = str(row.get('attribute_name', '')).strip()
-                        original_value = str(row.get('original_value', '')).strip()
-
-                        if not item_id or not attribute_name:
-                            attributes_failed += 1
-                            errors.append(
-                                f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
-                            )
-                            continue
-
-                        # Check if product exists
-                        product = existing_products.get(item_id)
-                        if not product:
-                            attributes_failed += 1
-                            errors.append(
-                                f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
-                                "Make sure it exists in Products sheet."
-                            )
-                            continue
-
-                        try:
-                            attr_obj, created = ProductAttributeValue.objects.update_or_create(
-                                product=product,
-                                attribute_name=attribute_name,
-                                defaults={'original_value': original_value}
-                            )
-
-                            if created:
-                                attributes_created += 1
-                            else:
-                                attributes_updated += 1
-                        except Exception as e:
-                            attributes_failed += 1
-                            errors.append(
-                                f"Attribute_values Row {idx + 2} "
-                                f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
-                            )
-
-            # Prepare response
+                 # Process Products sheet
+                 for idx, row in df_products.iterrows():
+                     item_id = str(row.get('item_id', '')).strip()
+                     product_type = str(row.get('product_type', '')).strip()
+
+                     if not item_id:
+                         products_failed += 1
+                         errors.append(f"Products Row {idx + 2}: Missing item_id")
+                         continue
+
+                     try:
+                         # Auto-create ProductType if provided and doesn't exist
+                         if product_type:
+                             ProductType.objects.get_or_create(name=product_type)
+
+                         defaults = {
+                             'product_name': str(row.get('product_name', '')),
+                             'product_long_description': str(row.get('product_long_description', '')),
+                             'product_short_description': str(row.get('product_short_description', '')),
+                             'product_type': product_type,
+                             'image_path': str(row.get('image_path', '')),
+                         }
+
+                         obj, created = Product.objects.update_or_create(
+                             item_id=item_id,
+                             defaults=defaults
+                         )
+
+                         if created:
+                             products_created += 1
+                         else:
+                             products_updated += 1
+                     except Exception as e:
+                         products_failed += 1
+                         errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
+                         logger.error(f"Error processing product {item_id} in Products sheet: {e}")
+
+
+                 # Process Attribute_values sheet if it exists
+                 if has_attributes_sheet and df_attributes is not None:
+                      # Group by item_id to optimize lookups
+                      item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
+                      
+                      # Fetch all products at once
+                      existing_products = {
+                          p.item_id: p 
+                          for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
+                      }
+
+                      for idx, row in df_attributes.iterrows():
+                          item_id = str(row.get('item_id', '')).strip()
+                          attribute_name = str(row.get('attribute_name', '')).strip()
+                          original_value = str(row.get('original_value', '')).strip()
+
+                          if not item_id or not attribute_name:
+                              attributes_failed += 1
+                              errors.append(
+                                  f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
+                              )
+                              continue
+
+                          # Check if product exists
+                          product = existing_products.get(item_id)
+                          if not product:
+                              attributes_failed += 1
+                              errors.append(
+                                  f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
+                                  "Make sure it exists in Products sheet."
+                              )
+                              continue
+
+                          try:
+                              attr_obj, created = ProductAttributeValue.objects.update_or_create(
+                                  product=product,
+                                  attribute_name=attribute_name,
+                                  defaults={'original_value': original_value}
+                              )
+
+                              if created:
+                                  attributes_created += 1
+                              else:
+                                  attributes_updated += 1
+                          except Exception as e:
+                              attributes_failed += 1
+                              errors.append(
+                                  f"Attribute_values Row {idx + 2} "
+                                  f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
+                              )
+                              logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
+
+            # Prepare response data
             response_data = {
-                'message': 'Upload completed successfully',
+                'message': 'Upload completed',
                 'products': {
                     'created': products_created,
                     'updated': products_updated,
                     'failed': products_failed,
                     'total_processed': products_created + products_updated + products_failed
-                }
+                },
+                'attribute_values': {
+                     'created': attributes_created,
+                     'updated': attributes_updated,
+                     'failed': attributes_failed,
+                     'total_processed': attributes_created + attributes_updated + attributes_failed
+                } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
+                'generated_excel_status': 'Excel generation started in the background.'
             }
 
-            if has_attributes_sheet:
-                response_data['attribute_values'] = {
-                    'created': attributes_created,
-                    'updated': attributes_updated,
-                    'failed': attributes_failed,
-                    'total_processed': attributes_created + attributes_updated + attributes_failed
-                }
-            else:
-                response_data['attribute_values'] = {
-                    'message': 'Attribute_values sheet not found in Excel file'
-                }
-
             if errors:
-                response_data['errors'] = errors[:50]  # Limit to first 50 errors
+                response_data['errors'] = errors[:50]
                 if len(errors) > 50:
                     response_data['errors'].append(f"... and {len(errors) - 50} more errors")
 
-            # Determine status code
-            if products_failed > 0 or attributes_failed > 0:
-                status_code = status.HTTP_207_MULTI_STATUS
+            # Determine status code for upload
+            upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
+
+            # Start background thread for Excel generation if upload was successful
+            if products_failed == 0 and attributes_failed == 0:
+                logger.info("API call successful. Triggering background Excel generation thread.")
+                threading.Thread(target=generate_product_excel_background, daemon=True).start()
+                
+                # Update response to provide monitoring paths
+                response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
+                response_data['monitoring'] = {
+                     'excel_file': os.path.join(settings.MEDIA_URL, 'generated_products.xlsx'),
+                     'status_file': os.path.join(settings.MEDIA_URL, 'excel_generation_status.json'),
+                     'log_file': os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
+                     'note': 'These files will be available once the background process completes.'
+                }
             else:
-                status_code = status.HTTP_201_CREATED
+                 logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
+                 response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
 
-            return Response(response_data, status=status_code)
+
+            return Response(response_data, status=upload_status)
 
         except pd.errors.EmptyDataError:
+            logger.error('The uploaded Excel file is empty or invalid.')
             return Response({
                 'error': 'The uploaded Excel file is empty or invalid'
             }, status=status.HTTP_400_BAD_REQUEST)
         except Exception as e:
+            logger.exception(f'An unexpected error occurred while processing the file.')
             return Response({
-                'error': f'An error occurred while processing the file: {str(e)}'
+                'error': f'An unexpected error occurred while processing the file: {str(e)}'
             }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
 
 
+
+
+
+
 # Add this view to your views.py for downloading a template
 
 from django.http import HttpResponse

BIN
db.sqlite3


+ 10 - 0
media/excel_generation_status.json

@@ -0,0 +1,10 @@
+{
+    "status": "SUCCESS",
+    "timestamp": "2025-10-27T15:32:50.296699",
+    "products_processed": 15,
+    "products_successful": 15,
+    "products_failed": 0,
+    "excel_path": "/media/generated_products.xlsx",
+    "log_path": "/media/excel_generation.log",
+    "error_message": null
+}

BIN
media/generated_products.xlsx


BIN
media/~$generated_products.xlsx