views.py 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722
  1. # ==================== Cleaned & Optimized Imports ====================
  2. import os
  3. import io
  4. import json
  5. import logging
  6. import threading
  7. from datetime import datetime
  8. import pandas as pd
  9. import concurrent.futures
  10. from django.conf import settings
  11. from django.http import HttpResponse
  12. from django.db import transaction
  13. from django.db.models import Prefetch
  14. from rest_framework.views import APIView
  15. from rest_framework.response import Response
  16. from rest_framework import status
  17. from rest_framework.parsers import MultiPartParser, FormParser
  18. from openpyxl import Workbook
  19. from openpyxl.styles import Font, PatternFill, Alignment
  20. # --- Local imports ---
  21. from .models import (
  22. Product,
  23. ProductType,
  24. ProductAttribute,
  25. ProductAttributeValue,
  26. AttributePossibleValue,
  27. )
  28. from .serializers import (
  29. ProductSerializer,
  30. ProductTypeSerializer,
  31. ProductAttributeSerializer,
  32. AttributePossibleValueSerializer,
  33. SingleProductRequestSerializer,
  34. BatchProductRequestSerializer,
  35. ProductAttributeResultSerializer,
  36. BatchProductResponseSerializer,
  37. ProductAttributeValueSerializer,
  38. ProductAttributeValueInputSerializer,
  39. BulkProductAttributeValueSerializer,
  40. ProductWithAttributesSerializer,
  41. )
  42. from .services import ProductAttributeService
  43. from .ocr_service import OCRService
  44. from .visual_processing_service import VisualProcessingService
  45. # --- Configuration for Generated Outputs Folder ---
  46. OUTPUT_FOLDER_NAME = 'generated_outputs'
  47. OUTPUT_ROOT = os.path.join(settings.MEDIA_ROOT, OUTPUT_FOLDER_NAME)
  48. OUTPUT_URL = os.path.join(settings.MEDIA_URL, OUTPUT_FOLDER_NAME).replace('\\', '/') # Ensure forward slashes
  49. LOG_FILE_NAME = 'excel_generation.log'
  50. STATUS_FILE_NAME = 'excel_generation_status.json'
  51. EXCEL_FILE_NAME = 'generated_products.xlsx'
  52. LOG_FILE_PATH = os.path.join(OUTPUT_ROOT, LOG_FILE_NAME)
  53. STATUS_FILE_PATH = os.path.join(OUTPUT_ROOT, STATUS_FILE_NAME)
  54. EXCEL_FILE_PATH = os.path.join(OUTPUT_ROOT, EXCEL_FILE_NAME)
  55. # Ensure the output folder exists
  56. if not os.path.exists(OUTPUT_ROOT):
  57. os.makedirs(OUTPUT_ROOT)
  58. # Configure logging
  59. logging.basicConfig(
  60. filename=LOG_FILE_PATH,
  61. level=logging.INFO,
  62. format='%(asctime)s - %(levelname)s - %(message)s'
  63. )
  64. logger = logging.getLogger(__name__)
  65. class ExtractProductAttributesView(APIView):
  66. """
  67. API endpoint to extract product attributes for a single product by item_id.
  68. Fetches product details from database with source tracking.
  69. Returns attributes in array format: [{"value": "...", "source": "..."}]
  70. Includes OCR and Visual Processing results.
  71. """
  72. def post(self, request):
  73. serializer = SingleProductRequestSerializer(data=request.data)
  74. if not serializer.is_valid():
  75. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  76. validated_data = serializer.validated_data
  77. item_id = validated_data.get("item_id")
  78. # Fetch product from DB
  79. try:
  80. product = Product.objects.get(item_id=item_id)
  81. except Product.DoesNotExist:
  82. return Response(
  83. {"error": f"Product with item_id '{item_id}' not found."},
  84. status=status.HTTP_404_NOT_FOUND
  85. )
  86. # Extract product details
  87. title = product.product_name
  88. short_desc = product.product_short_description
  89. long_desc = product.product_long_description
  90. image_url = product.image_path
  91. # Process image for OCR if required
  92. ocr_results = None
  93. ocr_text = None
  94. visual_results = None
  95. if validated_data.get("process_image", True) and image_url:
  96. # OCR Processing
  97. ocr_service = OCRService()
  98. ocr_results = ocr_service.process_image(image_url)
  99. if ocr_results and ocr_results.get("detected_text"):
  100. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  101. ocr_results, validated_data.get("model")
  102. )
  103. ocr_results["extracted_attributes"] = ocr_attrs
  104. ocr_text = "\n".join([
  105. f"{item['text']} (confidence: {item['confidence']:.2f})"
  106. for item in ocr_results["detected_text"]
  107. ])
  108. # Visual Processing
  109. visual_service = VisualProcessingService()
  110. product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  111. visual_results = visual_service.process_image(image_url, product_type_hint)
  112. # Combine all product text with source tracking
  113. product_text, source_map = ProductAttributeService.combine_product_text(
  114. title=title,
  115. short_desc=short_desc,
  116. long_desc=long_desc,
  117. ocr_text=ocr_text
  118. )
  119. # Extract attributes with enhanced features and source tracking
  120. result = ProductAttributeService.extract_attributes(
  121. product_text=product_text,
  122. mandatory_attrs=validated_data["mandatory_attrs"],
  123. source_map=source_map,
  124. model=validated_data.get("model"),
  125. extract_additional=validated_data.get("extract_additional", True),
  126. multiple=validated_data.get("multiple", []),
  127. threshold_abs=validated_data.get("threshold_abs", 0.65),
  128. margin=validated_data.get("margin", 0.15),
  129. use_dynamic_thresholds=validated_data.get("use_dynamic_thresholds", True),
  130. use_adaptive_margin=validated_data.get("use_adaptive_margin", True),
  131. use_semantic_clustering=validated_data.get("use_semantic_clustering", True)
  132. )
  133. # Attach OCR results if available
  134. if ocr_results:
  135. result["ocr_results"] = ocr_results
  136. # Attach Visual Processing results if available
  137. if visual_results:
  138. result["visual_results"] = visual_results
  139. response_serializer = ProductAttributeResultSerializer(data=result)
  140. if response_serializer.is_valid():
  141. return Response(response_serializer.data, status=status.HTTP_200_OK)
  142. return Response(result, status=status.HTTP_200_OK)
  143. # ==================== OPTIMIZED BATCH VIEW ====================
  144. class BatchExtractProductAttributesView(APIView):
  145. """
  146. ⚡ PERFORMANCE OPTIMIZED: Batch extraction with intelligent parallelization
  147. Expected performance: 10 products in 30-60 seconds (with image processing)
  148. """
  149. def post(self, request):
  150. import time
  151. start_time = time.time()
  152. serializer = BatchProductRequestSerializer(data=request.data)
  153. if not serializer.is_valid():
  154. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  155. validated_data = serializer.validated_data
  156. product_list = validated_data.get("products", [])
  157. logger.info(f"🚀 Starting batch processing for {len(product_list)} products")
  158. # ==================== OPTIMIZATION 1: Bulk DB Query ====================
  159. item_ids = [p['item_id'] for p in product_list]
  160. products_queryset = Product.objects.filter(
  161. item_id__in=item_ids
  162. ).prefetch_related('attribute_values')
  163. product_map = {product.item_id: product for product in products_queryset}
  164. # Prefetch ALL original attribute values in ONE query
  165. original_values_qs = ProductAttributeValue.objects.filter(
  166. product__item_id__in=item_ids
  167. ).select_related('product')
  168. original_values_map = {}
  169. for attr_val in original_values_qs:
  170. item_id = attr_val.product.item_id
  171. if item_id not in original_values_map:
  172. original_values_map[item_id] = {}
  173. original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
  174. logger.info(f"✓ Loaded {len(product_map)} products from database")
  175. # Extract settings
  176. model = validated_data.get("model")
  177. extract_additional = validated_data.get("extract_additional", True)
  178. process_image = validated_data.get("process_image", True)
  179. multiple = validated_data.get("multiple", [])
  180. threshold_abs = validated_data.get("threshold_abs", 0.65)
  181. margin = validated_data.get("margin", 0.15)
  182. use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
  183. use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
  184. use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
  185. results = []
  186. successful = 0
  187. failed = 0
  188. # ==================== OPTIMIZATION 2: Conditional Service Init ====================
  189. # Only initialize if processing images
  190. ocr_service = None
  191. visual_service = None
  192. if process_image:
  193. from .ocr_service import OCRService
  194. from .visual_processing_service import VisualProcessingService
  195. ocr_service = OCRService()
  196. visual_service = VisualProcessingService()
  197. logger.info("✓ Image processing services initialized")
  198. # ==================== OPTIMIZATION 3: Smart Parallelization ====================
  199. def process_single_product(product_entry):
  200. """Process a single product (runs in parallel)"""
  201. import time
  202. product_start = time.time()
  203. item_id = product_entry['item_id']
  204. mandatory_attrs = product_entry['mandatory_attrs']
  205. if item_id not in product_map:
  206. return {
  207. "product_id": item_id,
  208. "error": "Product not found in database"
  209. }, False
  210. product = product_map[item_id]
  211. try:
  212. title = product.product_name
  213. short_desc = product.product_short_description
  214. long_desc = product.product_long_description
  215. image_url = product.image_path
  216. ocr_results = None
  217. ocr_text = None
  218. visual_results = None
  219. # ⚡ SKIP IMAGE PROCESSING IF DISABLED (HUGE TIME SAVER)
  220. if process_image and image_url:
  221. if ocr_service:
  222. ocr_results = ocr_service.process_image(image_url)
  223. if ocr_results and ocr_results.get("detected_text"):
  224. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  225. ocr_results, model
  226. )
  227. ocr_results["extracted_attributes"] = ocr_attrs
  228. ocr_text = "\n".join([
  229. f"{item['text']} (confidence: {item['confidence']:.2f})"
  230. for item in ocr_results["detected_text"]
  231. ])
  232. if visual_service:
  233. product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  234. visual_results = visual_service.process_image(image_url, product_type_hint)
  235. if visual_results and visual_results.get('visual_attributes'):
  236. visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  237. visual_results['visual_attributes']
  238. )
  239. # Combine product text with source tracking
  240. product_text, source_map = ProductAttributeService.combine_product_text(
  241. title=title,
  242. short_desc=short_desc,
  243. long_desc=long_desc,
  244. ocr_text=ocr_text
  245. )
  246. # ⚡ EXTRACT ATTRIBUTES WITH CACHING ENABLED
  247. extracted = ProductAttributeService.extract_attributes(
  248. product_text=product_text,
  249. mandatory_attrs=mandatory_attrs,
  250. source_map=source_map,
  251. model=model,
  252. extract_additional=extract_additional,
  253. multiple=multiple,
  254. threshold_abs=threshold_abs,
  255. margin=margin,
  256. use_dynamic_thresholds=use_dynamic_thresholds,
  257. use_adaptive_margin=use_adaptive_margin,
  258. use_semantic_clustering=use_semantic_clustering,
  259. use_cache=True # ⚡ CRITICAL: Enable caching
  260. )
  261. # Add original values
  262. original_attrs = original_values_map.get(item_id, {})
  263. for attr_name, attr_values in extracted.get("mandatory", {}).items():
  264. if isinstance(attr_values, list):
  265. for attr_obj in attr_values:
  266. if isinstance(attr_obj, dict):
  267. attr_obj["original_value"] = original_attrs.get(attr_name, "")
  268. for attr_name, attr_values in extracted.get("additional", {}).items():
  269. if isinstance(attr_values, list):
  270. for attr_obj in attr_values:
  271. if isinstance(attr_obj, dict):
  272. attr_obj["original_value"] = original_attrs.get(attr_name, "")
  273. result = {
  274. "product_id": product.item_id,
  275. "mandatory": extracted.get("mandatory", {}),
  276. "additional": extracted.get("additional", {}),
  277. }
  278. if ocr_results:
  279. result["ocr_results"] = ocr_results
  280. if visual_results:
  281. result["visual_results"] = visual_results
  282. processing_time = time.time() - product_start
  283. logger.info(f"✓ Processed {item_id} in {processing_time:.2f}s")
  284. return result, True
  285. except Exception as e:
  286. logger.error(f"❌ Error processing {item_id}: {str(e)}")
  287. return {
  288. "product_id": item_id,
  289. "error": str(e)
  290. }, False
  291. # ==================== OPTIMIZATION 4: Parallel Execution ====================
  292. # Adjust workers based on whether image processing is enabled
  293. max_workers = min(3 if process_image else 10, len(product_list))
  294. logger.info(f"⚡ Using {max_workers} parallel workers")
  295. with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
  296. # Submit all tasks
  297. future_to_product = {
  298. executor.submit(process_single_product, product): product
  299. for product in product_list
  300. }
  301. # Collect results as they complete
  302. for future in concurrent.futures.as_completed(future_to_product):
  303. try:
  304. result, success = future.result()
  305. results.append(result)
  306. if success:
  307. successful += 1
  308. else:
  309. failed += 1
  310. except Exception as e:
  311. failed += 1
  312. logger.error(f"❌ Future execution error: {str(e)}")
  313. results.append({
  314. "product_id": "unknown",
  315. "error": str(e)
  316. })
  317. total_time = time.time() - start_time
  318. # Get cache statistics
  319. cache_stats = ProductAttributeService.get_cache_stats()
  320. logger.info(f"""
  321. 🎉 BATCH PROCESSING COMPLETE
  322. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  323. Total products: {len(product_list)}
  324. Successful: {successful}
  325. Failed: {failed}
  326. Total time: {total_time:.2f}s
  327. Avg time/product: {total_time/len(product_list):.2f}s
  328. Cache hit rate: {cache_stats['embedding_cache']['hit_rate_percent']:.1f}%
  329. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  330. """)
  331. batch_result = {
  332. "results": results,
  333. "total_products": len(product_list),
  334. "successful": successful,
  335. "failed": failed,
  336. "performance": {
  337. "total_time_seconds": round(total_time, 2),
  338. "avg_time_per_product": round(total_time / len(product_list), 2),
  339. "workers_used": max_workers
  340. },
  341. "cache_stats": cache_stats
  342. }
  343. response_serializer = BatchProductResponseSerializer(data=batch_result)
  344. if response_serializer.is_valid():
  345. return Response(response_serializer.data, status=status.HTTP_200_OK)
  346. return Response(batch_result, status=status.HTTP_200_OK)
  347. class ProductListView(APIView):
  348. """
  349. GET API to list all products with details
  350. """
  351. def get(self, request):
  352. products = Product.objects.all()
  353. serializer = ProductSerializer(products, many=True)
  354. return Response(serializer.data, status=status.HTTP_200_OK)
  355. # -------------------------------------------------------------------------------------------------
  356. def generate_product_excel_background():
  357. """
  358. Function to perform batch attribute extraction for all products and generate an Excel file.
  359. Runs in a background thread to avoid blocking the API response.
  360. Logs success/failure and saves a status file for external monitoring.
  361. """
  362. logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
  363. successful = 0
  364. failed = 0
  365. results = [] # To store detailed extraction results for Excel sheet 2
  366. # Function to write status file (SUCCESS/FAILED)
  367. def write_status(status_type, error_msg=None):
  368. status_data = {
  369. "status": status_type,
  370. "timestamp": datetime.now().isoformat(),
  371. "products_processed": successful + failed,
  372. "products_successful": successful,
  373. "products_failed": failed,
  374. # FIX: Use the updated EXCEL_FILE_NAME and OUTPUT_URL
  375. "excel_path": os.path.join(OUTPUT_URL, EXCEL_FILE_NAME) if status_type == "SUCCESS" else None,
  376. "log_path": os.path.join(OUTPUT_URL, LOG_FILE_NAME),
  377. "error_message": error_msg
  378. }
  379. try:
  380. # FIX: STATUS_FILE_PATH is now inside generated_outputs
  381. with open(STATUS_FILE_PATH, 'w') as f:
  382. json.dump(status_data, f, indent=4)
  383. except Exception as e:
  384. logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
  385. try:
  386. # 1. PREFETCH all necessary related data to minimize database queries
  387. # Prefetch possible values for mandatory attributes
  388. possible_values_prefetch = Prefetch(
  389. 'attributes',
  390. queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
  391. )
  392. # Fetch all ProductTypes with their mandatory attributes and possible values
  393. all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
  394. product_type_map = {
  395. pt.name: pt for pt in all_product_types
  396. }
  397. # Prepare product_list for batch extraction
  398. all_products = Product.objects.all()
  399. product_list = []
  400. for p in all_products:
  401. mandatory_attrs_dict = {}
  402. product_type_name = p.product_type.strip() if p.product_type else None
  403. if product_type_name and product_type_name in product_type_map:
  404. pt = product_type_map[product_type_name]
  405. # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
  406. for attr in pt.attributes.all():
  407. mandatory_attrs_dict[attr.name] = [
  408. pv.value for pv in attr.possible_values.all()
  409. ]
  410. product_list.append({
  411. "item_id": p.item_id,
  412. "product_type_name": product_type_name,
  413. "mandatory_attrs": mandatory_attrs_dict
  414. })
  415. # Batch settings (using defaults)
  416. model = "llama-3.1-8b-instant"
  417. extract_additional = True
  418. process_image = False
  419. multiple = []
  420. threshold_abs = 0.65
  421. margin = 0.15
  422. use_dynamic_thresholds = True
  423. use_adaptive_margin = True
  424. use_semantic_clustering = True
  425. # Batch extraction logic
  426. item_ids = [p['item_id'] for p in product_list]
  427. products_queryset = Product.objects.filter(item_id__in=item_ids)
  428. product_map = {product.item_id: product for product in products_queryset}
  429. found_ids = set(product_map.keys())
  430. for product_entry in product_list:
  431. item_id = product_entry['item_id']
  432. mandatory_attrs = product_entry['mandatory_attrs']
  433. if item_id not in found_ids:
  434. failed += 1
  435. results.append({
  436. "product_id": item_id,
  437. "error": "Product not found in database"
  438. })
  439. logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
  440. continue
  441. product = product_map[item_id]
  442. try:
  443. title = product.product_name
  444. short_desc = product.product_short_description
  445. long_desc = product.product_long_description
  446. image_url = product.image_path
  447. ocr_results = None
  448. ocr_text = None
  449. visual_results = None
  450. if process_image and image_url:
  451. logger.info(f"Processing image for product {item_id}...")
  452. # OCR Processing
  453. ocr_service = OCRService()
  454. ocr_results = ocr_service.process_image(image_url)
  455. if ocr_results and ocr_results.get("detected_text"):
  456. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  457. ocr_results, model
  458. )
  459. ocr_results["extracted_attributes"] = ocr_attrs
  460. ocr_text = "\n".join([
  461. f"{item['text']} (confidence: {item['confidence']:.2f})"
  462. for item in ocr_results["detected_text"]
  463. ])
  464. # Visual Processing
  465. visual_service = VisualProcessingService()
  466. product_type_hint = product.product_type if product.product_type else None
  467. visual_results = visual_service.process_image(image_url, product_type_hint)
  468. if visual_results and visual_results.get('visual_attributes'):
  469. visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  470. visual_results['visual_attributes']
  471. )
  472. logger.info(f"Image processing done for product {item_id}.")
  473. # Combine product text with source tracking
  474. product_text, source_map = ProductAttributeService.combine_product_text(
  475. title=title,
  476. short_desc=short_desc,
  477. long_desc=long_desc,
  478. ocr_text=ocr_text
  479. )
  480. # Attribute Extraction with source tracking
  481. extracted = ProductAttributeService.extract_attributes(
  482. product_text=product_text,
  483. mandatory_attrs=mandatory_attrs,
  484. source_map=source_map,
  485. model=model,
  486. extract_additional=extract_additional,
  487. multiple=multiple,
  488. threshold_abs=threshold_abs,
  489. margin=margin,
  490. use_dynamic_thresholds=use_dynamic_thresholds,
  491. use_adaptive_margin=use_adaptive_margin,
  492. use_semantic_clustering=use_semantic_clustering
  493. )
  494. result = {
  495. "product_id": item_id,
  496. "mandatory": extracted.get("mandatory", {}),
  497. "additional": extracted.get("additional", {}),
  498. }
  499. if ocr_results:
  500. result["ocr_results"] = ocr_results
  501. if visual_results:
  502. result["visual_results"] = visual_results
  503. results.append(result)
  504. successful += 1
  505. logger.info(f"Attribute extraction successful for product {item_id}.")
  506. except Exception as e:
  507. failed += 1
  508. results.append({
  509. "product_id": item_id,
  510. "error": str(e)
  511. })
  512. logger.exception(f"Error during attribute extraction for product {item_id}.")
  513. logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
  514. # --------------------------------------------------------------------------------
  515. # Generate and save the Excel file
  516. # --------------------------------------------------------------------------------
  517. wb = Workbook()
  518. # Sheet 1: Products (from DB) (Logic is the same, skipped for brevity)
  519. ws_products = wb.active
  520. ws_products.title = "Products"
  521. products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
  522. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  523. header_font = Font(bold=True, color="FFFFFF")
  524. for col_num, header in enumerate(products_headers, 1):
  525. cell = ws_products.cell(row=1, column=col_num)
  526. cell.value = header
  527. cell.fill = header_fill
  528. cell.font = header_font
  529. cell.alignment = Alignment(horizontal="center", vertical="center")
  530. all_products_db = Product.objects.all()
  531. for row_num, product in enumerate(all_products_db, 2):
  532. ws_products.cell(row=row_num, column=1, value=product.item_id)
  533. ws_products.cell(row=row_num, column=2, value=product.product_name)
  534. ws_products.cell(row=row_num, column=3, value=product.product_type)
  535. ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  536. ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  537. ws_products.cell(row=row_num, column=6, value=product.image_path)
  538. for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
  539. ws_products.column_dimensions[col_dim].width = width
  540. # Sheet 2: Attribute_values (Logic is the same, skipped for brevity)
  541. ws_attributes = wb.create_sheet("Attribute_values")
  542. attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
  543. for col_num, header in enumerate(attributes_headers, 1):
  544. cell = ws_attributes.cell(row=1, column=col_num)
  545. cell.value = header
  546. cell.fill = header_fill
  547. cell.font = header_font
  548. cell.alignment = Alignment(horizontal="center", vertical="center")
  549. row_num = 2
  550. all_original_attrs = ProductAttributeValue.objects.all()
  551. original_attrs_lookup = {
  552. (attr.product.item_id, attr.attribute_name): attr.original_value
  553. for attr in all_original_attrs
  554. }
  555. processed_original_keys = set()
  556. for res in results:
  557. # ... (Excel writing logic for generated/original attributes remains unchanged)
  558. item_id = res["product_id"]
  559. if "error" in res:
  560. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  561. if orig_item_id == item_id:
  562. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  563. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  564. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  565. ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
  566. processed_original_keys.add((orig_item_id, orig_attr_name))
  567. row_num += 1
  568. continue
  569. generated_attrs = {}
  570. for cat in ["mandatory", "additional"]:
  571. attrs = res.get(cat, {})
  572. for attr_name, values in attrs.items():
  573. for val in values:
  574. key = (item_id, attr_name)
  575. if key not in generated_attrs:
  576. generated_attrs[key] = []
  577. generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  578. ocr = res.get("ocr_results")
  579. if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
  580. for attr_name, values in ocr["extracted_attributes"].items():
  581. for val in values:
  582. key = (item_id, attr_name)
  583. if key not in generated_attrs:
  584. generated_attrs[key] = []
  585. generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  586. visual = res.get("visual_results")
  587. if visual and "visual_attributes" in visual:
  588. vis_attrs = visual["visual_attributes"]
  589. if isinstance(vis_attrs, dict):
  590. for attr_name, values in vis_attrs.items():
  591. if not isinstance(values, list):
  592. values = [{"value": values, "source": "visual"}]
  593. for val in values:
  594. key = (item_id, attr_name)
  595. if key not in generated_attrs:
  596. generated_attrs[key] = []
  597. generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
  598. elif isinstance(vis_attrs, list):
  599. for item in vis_attrs:
  600. attr_name = item.get("attribute_name") or item.get("name")
  601. if not attr_name: continue
  602. value = item.get("value", "")
  603. source = item.get("source", "visual")
  604. key = (item_id, attr_name)
  605. if key not in generated_attrs:
  606. generated_attrs[key] = []
  607. generated_attrs[key].append(f"{value} (source: {source})")
  608. for (attr_item_id, attr_name), gen_values in generated_attrs.items():
  609. original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
  610. generated_value = "; ".join(gen_values) if gen_values else ""
  611. ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
  612. ws_attributes.cell(row=row_num, column=2, value=attr_name)
  613. ws_attributes.cell(row=row_num, column=3, value=original_value)
  614. ws_attributes.cell(row=row_num, column=4, value=generated_value)
  615. processed_original_keys.add((attr_item_id, attr_name))
  616. row_num += 1
  617. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  618. if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
  619. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  620. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  621. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  622. ws_attributes.cell(row=row_num, column=4, value="")
  623. processed_original_keys.add((orig_item_id, orig_attr_name))
  624. row_num += 1
  625. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  626. if (orig_item_id, orig_attr_name) not in processed_original_keys:
  627. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  628. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  629. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  630. ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
  631. row_num += 1
  632. for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
  633. ws_attributes.column_dimensions[col_dim].width = width
  634. # FIX: Save to the new EXCEL_FILE_PATH
  635. wb.save(EXCEL_FILE_PATH)
  636. logger.info(f"Excel file successfully saved to {EXCEL_FILE_PATH}")
  637. # Write SUCCESS status
  638. write_status("SUCCESS")
  639. logger.info("Background task finished successfully.")
  640. except Exception as e:
  641. # Log the critical error and write FAILED status
  642. logger.exception("CRITICAL ERROR during background Excel generation process.")
  643. write_status("FAILED", error_msg=str(e))
  644. # -------------------------------------------------------------------------------------------------
  645. class ProductUploadExcelView(APIView):
  646. """
  647. POST API to upload an Excel file.
  648. """
  649. parser_classes = (MultiPartParser, FormParser)
  650. def post(self, request, *args, **kwargs):
  651. file_obj = request.FILES.get('file')
  652. if not file_obj:
  653. return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  654. try:
  655. # ... (Upload and DB processing logic remains unchanged)
  656. # Read all sheets from Excel file
  657. excel_file = pd.ExcelFile(file_obj)
  658. # Check if required sheets exist
  659. if 'Products' not in excel_file.sheet_names:
  660. logger.error(f"Upload failed: Missing 'Products' sheet in file.")
  661. return Response({
  662. 'error': "Missing 'Products' sheet",
  663. 'available_sheets': excel_file.sheet_names
  664. }, status=status.HTTP_400_BAD_REQUEST)
  665. df_products = pd.read_excel(excel_file, sheet_name='Products')
  666. df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
  667. expected_product_cols = {
  668. 'item_id', 'product_name', 'product_long_description',
  669. 'product_short_description', 'product_type', 'image_path'
  670. }
  671. if not expected_product_cols.issubset(df_products.columns):
  672. logger.error(f"Upload failed: Missing required columns in Products sheet.")
  673. return Response({
  674. 'error': 'Missing required columns in Products sheet',
  675. 'required_columns': list(expected_product_cols),
  676. 'found_columns': list(df_products.columns)
  677. }, status=status.HTTP_400_BAD_REQUEST)
  678. df_attributes = None
  679. has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
  680. if has_attributes_sheet:
  681. df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
  682. df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
  683. expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
  684. if not expected_attr_cols.issubset(df_attributes.columns):
  685. logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
  686. return Response({
  687. 'error': 'Missing required columns in Attribute_values sheet',
  688. 'required_columns': list(expected_attr_cols),
  689. 'found_columns': list(df_attributes.columns)
  690. }, status=status.HTTP_400_BAD_REQUEST)
  691. products_created = 0
  692. products_updated = 0
  693. attributes_created = 0
  694. attributes_updated = 0
  695. products_failed = 0
  696. attributes_failed = 0
  697. errors = []
  698. with transaction.atomic():
  699. for idx, row in df_products.iterrows():
  700. item_id = str(row.get('item_id', '')).strip()
  701. product_type = str(row.get('product_type', '')).strip()
  702. if not item_id:
  703. products_failed += 1
  704. errors.append(f"Products Row {idx + 2}: Missing item_id")
  705. continue
  706. try:
  707. if product_type:
  708. ProductType.objects.get_or_create(name=product_type)
  709. defaults = {
  710. 'product_name': str(row.get('product_name', '')),
  711. 'product_long_description': str(row.get('product_long_description', '')),
  712. 'product_short_description': str(row.get('product_short_description', '')),
  713. 'product_type': product_type,
  714. 'image_path': str(row.get('image_path', '')),
  715. }
  716. obj, created = Product.objects.update_or_create(item_id=item_id, defaults=defaults)
  717. if created: products_created += 1
  718. else: products_updated += 1
  719. except Exception as e:
  720. products_failed += 1
  721. errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
  722. logger.error(f"Error processing product {item_id} in Products sheet: {e}")
  723. if has_attributes_sheet and df_attributes is not None:
  724. item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
  725. existing_products = {p.item_id: p for p in Product.objects.filter(item_id__in=item_ids_in_attrs)}
  726. for idx, row in df_attributes.iterrows():
  727. item_id = str(row.get('item_id', '')).strip()
  728. attribute_name = str(row.get('attribute_name', '')).strip()
  729. original_value = str(row.get('original_value', '')).strip()
  730. if not item_id or not attribute_name:
  731. attributes_failed += 1
  732. errors.append(f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name")
  733. continue
  734. product = existing_products.get(item_id)
  735. if not product:
  736. attributes_failed += 1
  737. errors.append(f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. Make sure it exists in Products sheet.")
  738. continue
  739. try:
  740. attr_obj, created = ProductAttributeValue.objects.update_or_create(
  741. product=product,
  742. attribute_name=attribute_name,
  743. defaults={'original_value': original_value}
  744. )
  745. if created: attributes_created += 1
  746. else: attributes_updated += 1
  747. except Exception as e:
  748. attributes_failed += 1
  749. errors.append(f"Attribute_values Row {idx + 2} (item_id: {item_id}, attribute: {attribute_name}): {str(e)}")
  750. logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
  751. # Prepare response data
  752. response_data = {
  753. 'message': 'Upload completed',
  754. 'products': {
  755. 'created': products_created, 'updated': products_updated, 'failed': products_failed,
  756. 'total_processed': products_created + products_updated + products_failed
  757. },
  758. 'attribute_values': {
  759. 'created': attributes_created, 'updated': attributes_updated, 'failed': attributes_failed,
  760. 'total_processed': attributes_created + attributes_updated + attributes_failed
  761. } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
  762. 'generated_excel_status': 'Excel generation started in the background.'
  763. }
  764. if errors:
  765. response_data['errors'] = errors[:50]
  766. if len(errors) > 50:
  767. response_data['errors'].append(f"... and {len(errors) - 50} more errors")
  768. upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
  769. # Start background thread for Excel generation if upload was successful
  770. if products_failed == 0 and attributes_failed == 0:
  771. logger.info("API call successful. Triggering background Excel generation thread.")
  772. threading.Thread(target=generate_product_excel_background, daemon=True).start()
  773. # FIX: Update monitoring URLs to point to the new generated_outputs subfolder
  774. response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
  775. response_data['monitoring'] = {
  776. 'excel_file': os.path.join(OUTPUT_URL, EXCEL_FILE_NAME),
  777. 'status_file': os.path.join(OUTPUT_URL, STATUS_FILE_NAME),
  778. 'log_file': os.path.join(OUTPUT_URL, LOG_FILE_NAME),
  779. 'note': 'These files will be available once the background process completes.'
  780. }
  781. else:
  782. logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
  783. response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
  784. return Response(response_data, status=upload_status)
  785. except pd.errors.EmptyDataError:
  786. logger.error('The uploaded Excel file is empty or invalid.')
  787. return Response({'error': 'The uploaded Excel file is empty or invalid'}, status=status.HTTP_400_BAD_REQUEST)
  788. except Exception as e:
  789. logger.exception(f'An unexpected error occurred while processing the file.')
  790. return Response({'error': f'An unexpected error occurred while processing the file: {str(e)}'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  791. class DownloadExcelTemplateView(APIView):
  792. """
  793. GET API to download an Excel template with two sheets:
  794. 1. Products sheet with sample data
  795. 2. Attribute_values sheet with sample data
  796. """
  797. def get(self, request):
  798. # Create a new workbook
  799. wb = Workbook()
  800. # Remove default sheet
  801. if 'Sheet' in wb.sheetnames:
  802. wb.remove(wb['Sheet'])
  803. # ===== Create Products Sheet =====
  804. ws_products = wb.create_sheet("Products", 0)
  805. # Define headers for Products
  806. products_headers = [
  807. 'ITEM ID',
  808. 'PRODUCT NAME',
  809. 'PRODUCT TYPE',
  810. 'Product Short Description',
  811. 'Product Long Description',
  812. 'image_path'
  813. ]
  814. # Style for headers
  815. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  816. header_font = Font(bold=True, color="FFFFFF")
  817. # Add headers to Products sheet
  818. for col_num, header in enumerate(products_headers, 1):
  819. cell = ws_products.cell(row=1, column=col_num)
  820. cell.value = header
  821. cell.fill = header_fill
  822. cell.font = header_font
  823. cell.alignment = Alignment(horizontal="center", vertical="center")
  824. # Add sample data to Products sheet
  825. sample_products = [
  826. [
  827. '3217373735',
  828. 'Blue V-Neck T-Shirt',
  829. 'Clothing',
  830. 'Stylish blue t-shirt with v-neck design',
  831. 'Premium quality cotton t-shirt featuring a classic v-neck design. Perfect for casual wear. Available in vibrant blue color.',
  832. 'https://images.unsplash.com/photo-1521572163474-6864f9cf17ab'
  833. ],
  834. [
  835. '1234567890',
  836. 'Red Cotton Dress',
  837. 'Clothing',
  838. 'Beautiful red dress for special occasions',
  839. 'Elegant red dress made from 100% cotton fabric. Features a flowing design perfect for summer events and parties.',
  840. 'https://images.unsplash.com/photo-1595777457583-95e059d581b8'
  841. ],
  842. [
  843. '9876543210',
  844. 'Steel Screws Pack',
  845. 'Hardware',
  846. 'Pack of zinc plated steel screws',
  847. 'Professional grade steel screws with zinc plating for corrosion resistance. Pack contains 50 pieces, 2 inch length, M6 thread size.',
  848. 'https://images.unsplash.com/photo-1542272604-787c3835535d'
  849. ]
  850. ]
  851. for row_num, row_data in enumerate(sample_products, 2):
  852. for col_num, value in enumerate(row_data, 1):
  853. ws_products.cell(row=row_num, column=col_num, value=value)
  854. # Adjust column widths for Products sheet
  855. ws_products.column_dimensions['A'].width = 15 # ITEM ID
  856. ws_products.column_dimensions['B'].width = 25 # PRODUCT NAME
  857. ws_products.column_dimensions['C'].width = 15 # PRODUCT TYPE
  858. ws_products.column_dimensions['D'].width = 35 # Short Description
  859. ws_products.column_dimensions['E'].width = 50 # Long Description
  860. ws_products.column_dimensions['F'].width = 45 # image_path
  861. # ===== Create Attribute_values Sheet =====
  862. ws_attributes = wb.create_sheet("Attribute_values", 1)
  863. # Define headers for Attribute_values
  864. attributes_headers = ['item_id', 'attribute_name', 'original_value']
  865. # Add headers to Attribute_values sheet
  866. for col_num, header in enumerate(attributes_headers, 1):
  867. cell = ws_attributes.cell(row=1, column=col_num)
  868. cell.value = header
  869. cell.fill = header_fill
  870. cell.font = header_font
  871. cell.alignment = Alignment(horizontal="center", vertical="center")
  872. # Add sample data to Attribute_values sheet
  873. sample_attributes = [
  874. ['3217373735', 'Clothing Neck Style', 'V-Neck Square'],
  875. ['3217373735', 'Condition', 'New with tags'],
  876. ['3217373735', 'Material', '100% Cotton'],
  877. ['3217373735', 'Color', 'Sky Blue'],
  878. ['3217373735', 'Size', 'Medium'],
  879. ['1234567890', 'Sleeve Length', 'Sleeveless'],
  880. ['1234567890', 'Condition', 'Brand New'],
  881. ['1234567890', 'Pattern', 'Solid'],
  882. ['1234567890', 'Material', 'Cotton Blend'],
  883. ['1234567890', 'Color', 'Crimson Red'],
  884. ['9876543210', 'Material', 'Stainless Steel'],
  885. ['9876543210', 'Thread Size', 'M6'],
  886. ['9876543210', 'Length', '2 inches'],
  887. ['9876543210', 'Coating', 'Zinc Plated'],
  888. ['9876543210', 'Package Quantity', '50 pieces'],
  889. ]
  890. for row_num, row_data in enumerate(sample_attributes, 2):
  891. for col_num, value in enumerate(row_data, 1):
  892. ws_attributes.cell(row=row_num, column=col_num, value=value)
  893. # Adjust column widths for Attribute_values sheet
  894. ws_attributes.column_dimensions['A'].width = 15 # item_id
  895. ws_attributes.column_dimensions['B'].width = 25 # attribute_name
  896. ws_attributes.column_dimensions['C'].width = 30 # original_value
  897. # Add instructions sheet
  898. ws_instructions = wb.create_sheet("Instructions", 2)
  899. instructions_text = [
  900. ['Excel Upload Instructions', ''],
  901. ['', ''],
  902. ['Sheet 1: Products', ''],
  903. ['- Contains product basic information', ''],
  904. ['- All columns are required', ''],
  905. ['- ITEM ID must be unique', ''],
  906. ['', ''],
  907. ['Sheet 2: Attribute_values', ''],
  908. ['- Contains original/manual attribute values', ''],
  909. ['- item_id must match an ITEM ID from Products sheet', ''],
  910. ['- Multiple rows can have the same item_id (for different attributes)', ''],
  911. ['- Each attribute per product should be on a separate row', ''],
  912. ['', ''],
  913. ['Upload Process:', ''],
  914. ['1. Fill in your product data in the Products sheet', ''],
  915. ['2. Fill in attribute values in the Attribute_values sheet', ''],
  916. ['3. Ensure item_id values match between both sheets', ''],
  917. ['4. Save the file and upload via API', ''],
  918. ['', ''],
  919. ['Notes:', ''],
  920. ['- Do not change sheet names (must be "Products" and "Attribute_values")', ''],
  921. ['- Do not change column header names', ''],
  922. ['- You can delete the sample data rows', ''],
  923. ['- You can delete this Instructions sheet before uploading', ''],
  924. ]
  925. for row_num, row_data in enumerate(instructions_text, 1):
  926. ws_instructions.cell(row=row_num, column=1, value=row_data[0])
  927. if row_num == 1:
  928. cell = ws_instructions.cell(row=row_num, column=1)
  929. cell.font = Font(bold=True, size=14)
  930. ws_instructions.column_dimensions['A'].width = 60
  931. # Save to BytesIO
  932. output = io.BytesIO()
  933. wb.save(output)
  934. output.seek(0)
  935. # Create response
  936. response = HttpResponse(
  937. output.getvalue(),
  938. content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
  939. )
  940. response['Content-Disposition'] = 'attachment; filename=product_upload_template.xlsx'
  941. return response
  942. class DownloadProductsWithAttributesExcelView(APIView):
  943. """
  944. GET API to download existing products with their attribute values as Excel.
  945. Useful for users to update existing data.
  946. """
  947. def get(self, request):
  948. from .models import Product, ProductAttributeValue
  949. # Create workbook
  950. wb = Workbook()
  951. if 'Sheet' in wb.sheetnames:
  952. wb.remove(wb['Sheet'])
  953. # ===== Products Sheet =====
  954. ws_products = wb.create_sheet("Products", 0)
  955. # Headers
  956. products_headers = [
  957. 'ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE',
  958. 'Product Short Description', 'Product Long Description', 'image_path'
  959. ]
  960. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  961. header_font = Font(bold=True, color="FFFFFF")
  962. for col_num, header in enumerate(products_headers, 1):
  963. cell = ws_products.cell(row=1, column=col_num)
  964. cell.value = header
  965. cell.fill = header_fill
  966. cell.font = header_font
  967. cell.alignment = Alignment(horizontal="center", vertical="center")
  968. # Fetch and add product data
  969. products = Product.objects.all()
  970. for row_num, product in enumerate(products, 2):
  971. ws_products.cell(row=row_num, column=1, value=product.item_id)
  972. ws_products.cell(row=row_num, column=2, value=product.product_name)
  973. ws_products.cell(row=row_num, column=3, value=product.product_type)
  974. ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  975. ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  976. ws_products.cell(row=row_num, column=6, value=product.image_path)
  977. # Adjust widths
  978. ws_products.column_dimensions['A'].width = 15
  979. ws_products.column_dimensions['B'].width = 25
  980. ws_products.column_dimensions['C'].width = 15
  981. ws_products.column_dimensions['D'].width = 35
  982. ws_products.column_dimensions['E'].width = 50
  983. ws_products.column_dimensions['F'].width = 45
  984. # ===== Attribute_values Sheet =====
  985. ws_attributes = wb.create_sheet("Attribute_values", 1)
  986. attributes_headers = ['item_id', 'attribute_name', 'original_value']
  987. for col_num, header in enumerate(attributes_headers, 1):
  988. cell = ws_attributes.cell(row=1, column=col_num)
  989. cell.value = header
  990. cell.fill = header_fill
  991. cell.font = header_font
  992. cell.alignment = Alignment(horizontal="center", vertical="center")
  993. # Fetch and add attribute values
  994. attributes = ProductAttributeValue.objects.select_related('product').all()
  995. for row_num, attr in enumerate(attributes, 2):
  996. ws_attributes.cell(row=row_num, column=1, value=attr.product.item_id)
  997. ws_attributes.cell(row=row_num, column=2, value=attr.attribute_name)
  998. ws_attributes.cell(row=row_num, column=3, value=attr.original_value)
  999. ws_attributes.column_dimensions['A'].width = 15
  1000. ws_attributes.column_dimensions['B'].width = 25
  1001. ws_attributes.column_dimensions['C'].width = 30
  1002. # Save to BytesIO
  1003. output = io.BytesIO()
  1004. wb.save(output)
  1005. output.seek(0)
  1006. response = HttpResponse(
  1007. output.getvalue(),
  1008. content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
  1009. )
  1010. response['Content-Disposition'] = 'attachment; filename=products_export.xlsx'
  1011. return response
  1012. class ProductAttributesUploadView(APIView):
  1013. """
  1014. POST API to upload an Excel file and add mandatory/additional attributes
  1015. for product types with possible values.
  1016. """
  1017. parser_classes = (MultiPartParser, FormParser)
  1018. def post(self, request):
  1019. file_obj = request.FILES.get('file')
  1020. if not file_obj:
  1021. return Response({"error": "No file provided."}, status=status.HTTP_400_BAD_REQUEST)
  1022. try:
  1023. df = pd.read_excel(file_obj)
  1024. required_columns = {'product_type', 'attribute_name', 'is_mandatory', 'possible_values'}
  1025. if not required_columns.issubset(df.columns):
  1026. return Response({
  1027. "error": f"Missing required columns. Found: {list(df.columns)}"
  1028. }, status=status.HTTP_400_BAD_REQUEST)
  1029. for _, row in df.iterrows():
  1030. product_type_name = str(row['product_type']).strip()
  1031. attr_name = str(row['attribute_name']).strip()
  1032. is_mandatory = str(row['is_mandatory']).strip().lower() in ['yes', 'true', '1']
  1033. possible_values = str(row.get('possible_values', '')).strip()
  1034. # Get or create product type
  1035. product_type, _ = ProductType.objects.get_or_create(name=product_type_name)
  1036. # Get or create attribute
  1037. attribute, _ = ProductAttribute.objects.get_or_create(
  1038. product_type=product_type,
  1039. name=attr_name,
  1040. defaults={'is_mandatory': is_mandatory}
  1041. )
  1042. attribute.is_mandatory = is_mandatory
  1043. attribute.save()
  1044. # Handle possible values
  1045. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  1046. if possible_values:
  1047. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1048. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1049. return Response({"message": "Attributes uploaded successfully."}, status=status.HTTP_201_CREATED)
  1050. except Exception as e:
  1051. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1052. class ProductTypeAttributesView(APIView):
  1053. """
  1054. API to view, create, update, and delete product type attributes and their possible values.
  1055. Also supports dynamic product type creation.
  1056. """
  1057. def get(self, request):
  1058. """
  1059. Retrieve all product types with their attributes and possible values.
  1060. """
  1061. product_types = ProductType.objects.all()
  1062. serializer = ProductTypeSerializer(product_types, many=True)
  1063. # Transform the serialized data into the requested format
  1064. result = []
  1065. for pt in serializer.data:
  1066. for attr in pt['attributes']:
  1067. result.append({
  1068. 'product_type': pt['name'],
  1069. 'attribute_name': attr['name'],
  1070. 'is_mandatory': 'Yes' if attr['is_mandatory'] else 'No',
  1071. 'possible_values': ', '.join([pv['value'] for pv in attr['possible_values']])
  1072. })
  1073. return Response(result, status=status.HTTP_200_OK)
  1074. def post(self, request):
  1075. """
  1076. Create a new product type or attribute with possible values.
  1077. Expected payload example:
  1078. {
  1079. "product_type": "Hardware Screws",
  1080. "attribute_name": "Material",
  1081. "is_mandatory": "Yes",
  1082. "possible_values": "Steel, Zinc Plated, Stainless Steel"
  1083. }
  1084. """
  1085. try:
  1086. product_type_name = request.data.get('product_type')
  1087. attribute_name = request.data.get('attribute_name', '')
  1088. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  1089. possible_values = request.data.get('possible_values', '')
  1090. if not product_type_name:
  1091. return Response({
  1092. "error": "product_type is required"
  1093. }, status=status.HTTP_400_BAD_REQUEST)
  1094. with transaction.atomic():
  1095. # Get or create product type
  1096. product_type, created = ProductType.objects.get_or_create(name=product_type_name)
  1097. if created and not attribute_name:
  1098. return Response({
  1099. "message": f"Product type '{product_type_name}' created successfully",
  1100. "data": {"product_type": product_type_name}
  1101. }, status=status.HTTP_201_CREATED)
  1102. if attribute_name:
  1103. # Create attribute
  1104. attribute, attr_created = ProductAttribute.objects.get_or_create(
  1105. product_type=product_type,
  1106. name=attribute_name,
  1107. defaults={'is_mandatory': is_mandatory}
  1108. )
  1109. if not attr_created:
  1110. return Response({
  1111. "error": f"Attribute '{attribute_name}' already exists for product type '{product_type_name}'"
  1112. }, status=status.HTTP_400_BAD_REQUEST)
  1113. # Handle possible values
  1114. if possible_values:
  1115. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1116. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1117. return Response({
  1118. "message": "Attribute created successfully",
  1119. "data": {
  1120. "product_type": product_type_name,
  1121. "attribute_name": attribute_name,
  1122. "is_mandatory": "Yes" if is_mandatory else "No",
  1123. "possible_values": possible_values
  1124. }
  1125. }, status=status.HTTP_201_CREATED)
  1126. return Response({
  1127. "message": f"Product type '{product_type_name}' already exists",
  1128. "data": {"product_type": product_type_name}
  1129. }, status=status.HTTP_200_OK)
  1130. except Exception as e:
  1131. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1132. def put(self, request):
  1133. """
  1134. Update an existing product type attribute and its possible values.
  1135. Expected payload example:
  1136. {
  1137. "product_type": "Hardware Screws",
  1138. "attribute_name": "Material",
  1139. "is_mandatory": "Yes",
  1140. "possible_values": "Steel, Zinc Plated, Stainless Steel, Brass"
  1141. }
  1142. """
  1143. try:
  1144. product_type_name = request.data.get('product_type')
  1145. attribute_name = request.data.get('attribute_name')
  1146. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  1147. possible_values = request.data.get('possible_values', '')
  1148. if not all([product_type_name, attribute_name]):
  1149. return Response({
  1150. "error": "product_type and attribute_name are required"
  1151. }, status=status.HTTP_400_BAD_REQUEST)
  1152. with transaction.atomic():
  1153. try:
  1154. product_type = ProductType.objects.get(name=product_type_name)
  1155. attribute = ProductAttribute.objects.get(
  1156. product_type=product_type,
  1157. name=attribute_name
  1158. )
  1159. except ProductType.DoesNotExist:
  1160. return Response({
  1161. "error": f"Product type '{product_type_name}' not found"
  1162. }, status=status.HTTP_404_NOT_FOUND)
  1163. except ProductAttribute.DoesNotExist:
  1164. return Response({
  1165. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  1166. }, status=status.HTTP_404_NOT_FOUND)
  1167. # Update attribute
  1168. attribute.is_mandatory = is_mandatory
  1169. attribute.save()
  1170. # Update possible values
  1171. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  1172. if possible_values:
  1173. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1174. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1175. return Response({
  1176. "message": "Attribute updated successfully",
  1177. "data": {
  1178. "product_type": product_type_name,
  1179. "attribute_name": attribute_name,
  1180. "is_mandatory": "Yes" if is_mandatory else "No",
  1181. "possible_values": possible_values
  1182. }
  1183. }, status=status.HTTP_200_OK)
  1184. except Exception as e:
  1185. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1186. def delete(self, request):
  1187. """
  1188. Delete a product type or a specific attribute.
  1189. Expected payload example:
  1190. {
  1191. "product_type": "Hardware Screws",
  1192. "attribute_name": "Material"
  1193. }
  1194. """
  1195. try:
  1196. product_type_name = request.data.get('product_type')
  1197. attribute_name = request.data.get('attribute_name', '')
  1198. if not product_type_name:
  1199. return Response({
  1200. "error": "product_type is required"
  1201. }, status=status.HTTP_400_BAD_REQUEST)
  1202. with transaction.atomic():
  1203. try:
  1204. product_type = ProductType.objects.get(name=product_type_name)
  1205. except ProductType.DoesNotExist:
  1206. return Response({
  1207. "error": f"Product type '{product_type_name}' not found"
  1208. }, status=status.HTTP_404_NOT_FOUND)
  1209. if attribute_name:
  1210. # Delete specific attribute
  1211. try:
  1212. attribute = ProductAttribute.objects.get(
  1213. product_type=product_type,
  1214. name=attribute_name
  1215. )
  1216. attribute.delete()
  1217. return Response({
  1218. "message": f"Attribute '{attribute_name}' deleted successfully from product type '{product_type_name}'"
  1219. }, status=status.HTTP_200_OK)
  1220. except ProductAttribute.DoesNotExist:
  1221. return Response({
  1222. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  1223. }, status=status.HTTP_404_NOT_FOUND)
  1224. else:
  1225. # Delete entire product type
  1226. product_type.delete()
  1227. return Response({
  1228. "message": f"Product type '{product_type_name}' and all its attributes deleted successfully"
  1229. }, status=status.HTTP_200_OK)
  1230. except Exception as e:
  1231. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1232. class ProductTypeListView(APIView):
  1233. """
  1234. GET API to list all product types (only names).
  1235. """
  1236. def get(self, request):
  1237. product_types = ProductType.objects.values_list('name', flat=True)
  1238. return Response({"product_types": list(product_types)}, status=status.HTTP_200_OK)
  1239. class ProductAttributeValueView(APIView):
  1240. """
  1241. API to manage manually entered original attribute values.
  1242. GET: Retrieve all attribute values for a product
  1243. POST: Create or update attribute values for a product
  1244. DELETE: Delete attribute values
  1245. """
  1246. def get(self, request):
  1247. """
  1248. Get original attribute values for a specific product or all products.
  1249. Query params: item_id (optional)
  1250. """
  1251. item_id = request.query_params.get('item_id')
  1252. if item_id:
  1253. try:
  1254. product = Product.objects.get(item_id=item_id)
  1255. values = ProductAttributeValue.objects.filter(product=product)
  1256. serializer = ProductAttributeValueSerializer(values, many=True)
  1257. return Response({
  1258. "item_id": item_id,
  1259. "attributes": serializer.data
  1260. }, status=status.HTTP_200_OK)
  1261. except Product.DoesNotExist:
  1262. return Response({
  1263. "error": f"Product with item_id '{item_id}' not found"
  1264. }, status=status.HTTP_404_NOT_FOUND)
  1265. else:
  1266. # Return all attribute values grouped by product
  1267. values = ProductAttributeValue.objects.all().select_related('product')
  1268. serializer = ProductAttributeValueSerializer(values, many=True)
  1269. return Response(serializer.data, status=status.HTTP_200_OK)
  1270. def post(self, request):
  1271. """
  1272. Create or update original attribute value for a product.
  1273. Expected payload:
  1274. {
  1275. "item_id": "3217373735",
  1276. "attribute_name": "Clothing Neck Style",
  1277. "original_value": "V-Neck Square"
  1278. }
  1279. """
  1280. serializer = ProductAttributeValueInputSerializer(data=request.data)
  1281. if not serializer.is_valid():
  1282. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  1283. validated_data = serializer.validated_data
  1284. item_id = validated_data['item_id']
  1285. attribute_name = validated_data['attribute_name']
  1286. original_value = validated_data['original_value']
  1287. try:
  1288. product = Product.objects.get(item_id=item_id)
  1289. except Product.DoesNotExist:
  1290. return Response({
  1291. "error": f"Product with item_id '{item_id}' not found"
  1292. }, status=status.HTTP_404_NOT_FOUND)
  1293. # Create or update the attribute value
  1294. attr_value, created = ProductAttributeValue.objects.update_or_create(
  1295. product=product,
  1296. attribute_name=attribute_name,
  1297. defaults={'original_value': original_value}
  1298. )
  1299. response_serializer = ProductAttributeValueSerializer(attr_value)
  1300. return Response({
  1301. "message": "Attribute value created" if created else "Attribute value updated",
  1302. "data": response_serializer.data
  1303. }, status=status.HTTP_201_CREATED if created else status.HTTP_200_OK)
  1304. def delete(self, request):
  1305. """
  1306. Delete original attribute value(s).
  1307. Expected payload:
  1308. {
  1309. "item_id": "3217373735",
  1310. "attribute_name": "Clothing Neck Style" # Optional, if not provided deletes all for product
  1311. }
  1312. """
  1313. item_id = request.data.get('item_id')
  1314. attribute_name = request.data.get('attribute_name')
  1315. if not item_id:
  1316. return Response({
  1317. "error": "item_id is required"
  1318. }, status=status.HTTP_400_BAD_REQUEST)
  1319. try:
  1320. product = Product.objects.get(item_id=item_id)
  1321. except Product.DoesNotExist:
  1322. return Response({
  1323. "error": f"Product with item_id '{item_id}' not found"
  1324. }, status=status.HTTP_404_NOT_FOUND)
  1325. if attribute_name:
  1326. # Delete specific attribute
  1327. deleted_count, _ = ProductAttributeValue.objects.filter(
  1328. product=product,
  1329. attribute_name=attribute_name
  1330. ).delete()
  1331. if deleted_count == 0:
  1332. return Response({
  1333. "error": f"Attribute '{attribute_name}' not found for product '{item_id}'"
  1334. }, status=status.HTTP_404_NOT_FOUND)
  1335. return Response({
  1336. "message": f"Attribute '{attribute_name}' deleted successfully"
  1337. }, status=status.HTTP_200_OK)
  1338. else:
  1339. # Delete all attributes for product
  1340. deleted_count, _ = ProductAttributeValue.objects.filter(product=product).delete()
  1341. return Response({
  1342. "message": f"Deleted {deleted_count} attribute(s) for product '{item_id}'"
  1343. }, status=status.HTTP_200_OK)
  1344. class BulkProductAttributeValueView(APIView):
  1345. """
  1346. API for bulk operations on original attribute values.
  1347. POST: Create/update multiple attribute values at once
  1348. """
  1349. def post(self, request):
  1350. """
  1351. Bulk create or update attribute values for multiple products.
  1352. Expected payload:
  1353. {
  1354. "products": [
  1355. {
  1356. "item_id": "3217373735",
  1357. "attributes": {
  1358. "Clothing Neck Style": "V-Neck Square",
  1359. "Condition": "New with tags"
  1360. }
  1361. },
  1362. {
  1363. "item_id": "1234567890",
  1364. "attributes": {
  1365. "Material": "Cotton",
  1366. "Size": "L"
  1367. }
  1368. }
  1369. ]
  1370. }
  1371. """
  1372. products_data = request.data.get('products', [])
  1373. if not products_data:
  1374. return Response({
  1375. "error": "products list is required"
  1376. }, status=status.HTTP_400_BAD_REQUEST)
  1377. results = []
  1378. successful = 0
  1379. failed = 0
  1380. with transaction.atomic():
  1381. for product_data in products_data:
  1382. serializer = BulkProductAttributeValueSerializer(data=product_data)
  1383. if not serializer.is_valid():
  1384. failed += 1
  1385. results.append({
  1386. "item_id": product_data.get('item_id'),
  1387. "status": "failed",
  1388. "error": serializer.errors
  1389. })
  1390. continue
  1391. validated_data = serializer.validated_data
  1392. item_id = validated_data['item_id']
  1393. attributes = validated_data['attributes']
  1394. try:
  1395. product = Product.objects.get(item_id=item_id)
  1396. created_count = 0
  1397. updated_count = 0
  1398. for attr_name, original_value in attributes.items():
  1399. _, created = ProductAttributeValue.objects.update_or_create(
  1400. product=product,
  1401. attribute_name=attr_name,
  1402. defaults={'original_value': original_value}
  1403. )
  1404. if created:
  1405. created_count += 1
  1406. else:
  1407. updated_count += 1
  1408. successful += 1
  1409. results.append({
  1410. "item_id": item_id,
  1411. "status": "success",
  1412. "created": created_count,
  1413. "updated": updated_count
  1414. })
  1415. except Product.DoesNotExist:
  1416. failed += 1
  1417. results.append({
  1418. "item_id": item_id,
  1419. "status": "failed",
  1420. "error": f"Product not found"
  1421. })
  1422. return Response({
  1423. "results": results,
  1424. "total_products": len(products_data),
  1425. "successful": successful,
  1426. "failed": failed
  1427. }, status=status.HTTP_200_OK)
  1428. class ProductListWithAttributesView(APIView):
  1429. """
  1430. GET API to list all products with their original attribute values.
  1431. """
  1432. def get(self, request):
  1433. item_id = request.query_params.get('item_id')
  1434. if item_id:
  1435. try:
  1436. product = Product.objects.get(item_id=item_id)
  1437. serializer = ProductWithAttributesSerializer(product)
  1438. return Response(serializer.data, status=status.HTTP_200_OK)
  1439. except Product.DoesNotExist:
  1440. return Response({
  1441. "error": f"Product with item_id '{item_id}' not found"
  1442. }, status=status.HTTP_404_NOT_FOUND)
  1443. else:
  1444. products = Product.objects.all()
  1445. serializer = ProductWithAttributesSerializer(products, many=True)
  1446. return Response(serializer.data, status=status.HTTP_200_OK)