views.py 117 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732
  1. # ==================== Cleaned & Optimized Imports ====================
  2. import os
  3. import io
  4. import json
  5. import logging
  6. import threading
  7. from datetime import datetime
  8. import pandas as pd
  9. import concurrent.futures
  10. from django.conf import settings
  11. from django.http import HttpResponse
  12. from django.db import transaction
  13. from django.db.models import Prefetch
  14. from rest_framework.views import APIView
  15. from rest_framework.response import Response
  16. from rest_framework import status
  17. from rest_framework.parsers import MultiPartParser, FormParser
  18. from openpyxl import Workbook
  19. from openpyxl.styles import Font, PatternFill, Alignment
  20. from rest_framework.views import APIView
  21. from . import cache_config
  22. # --- Local imports ---
  23. from .models import (
  24. Product,
  25. ProductType,
  26. ProductAttribute,
  27. ProductAttributeValue,
  28. AttributePossibleValue,
  29. )
  30. from .serializers import (
  31. ProductSerializer,
  32. ProductTypeSerializer,
  33. ProductAttributeSerializer,
  34. AttributePossibleValueSerializer,
  35. SingleProductRequestSerializer,
  36. BatchProductRequestSerializer,
  37. ProductAttributeResultSerializer,
  38. BatchProductResponseSerializer,
  39. ProductAttributeValueSerializer,
  40. ProductAttributeValueInputSerializer,
  41. BulkProductAttributeValueSerializer,
  42. ProductWithAttributesSerializer,
  43. )
  44. from .services import ProductAttributeService
  45. from .ocr_service import OCRService
  46. from .visual_processing_service import VisualProcessingService
  47. # --- Configuration for Generated Outputs Folder ---
  48. OUTPUT_FOLDER_NAME = 'generated_outputs'
  49. OUTPUT_ROOT = os.path.join(settings.MEDIA_ROOT, OUTPUT_FOLDER_NAME)
  50. OUTPUT_URL = os.path.join(settings.MEDIA_URL, OUTPUT_FOLDER_NAME).replace('\\', '/') # Ensure forward slashes
  51. LOG_FILE_NAME = 'excel_generation.log'
  52. STATUS_FILE_NAME = 'excel_generation_status.json'
  53. EXCEL_FILE_NAME = 'generated_products.xlsx'
  54. LOG_FILE_PATH = os.path.join(OUTPUT_ROOT, LOG_FILE_NAME)
  55. STATUS_FILE_PATH = os.path.join(OUTPUT_ROOT, STATUS_FILE_NAME)
  56. EXCEL_FILE_PATH = os.path.join(OUTPUT_ROOT, EXCEL_FILE_NAME)
  57. # Ensure the output folder exists
  58. if not os.path.exists(OUTPUT_ROOT):
  59. os.makedirs(OUTPUT_ROOT)
  60. # Configure logging
  61. logging.basicConfig(
  62. filename=LOG_FILE_PATH,
  63. level=logging.INFO,
  64. format='%(asctime)s - %(levelname)s - %(message)s'
  65. )
  66. logger = logging.getLogger(__name__)
  67. class ExtractProductAttributesView(APIView):
  68. """
  69. API endpoint to extract product attributes for a single product by item_id.
  70. Fetches product details from database with source tracking.
  71. Returns attributes in array format: [{"value": "...", "source": "..."}]
  72. Includes OCR and Visual Processing results.
  73. """
  74. def post(self, request):
  75. serializer = SingleProductRequestSerializer(data=request.data)
  76. if not serializer.is_valid():
  77. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  78. validated_data = serializer.validated_data
  79. item_id = validated_data.get("item_id")
  80. # Fetch product from DB
  81. try:
  82. product = Product.objects.get(item_id=item_id)
  83. except Product.DoesNotExist:
  84. return Response(
  85. {"error": f"Product with item_id '{item_id}' not found."},
  86. status=status.HTTP_404_NOT_FOUND
  87. )
  88. # Extract product details
  89. title = product.product_name
  90. short_desc = product.product_short_description
  91. long_desc = product.product_long_description
  92. image_url = product.image_path
  93. # Process image for OCR if required
  94. ocr_results = None
  95. ocr_text = None
  96. visual_results = None
  97. if validated_data.get("process_image", True) and image_url:
  98. # OCR Processing
  99. ocr_service = OCRService()
  100. ocr_results = ocr_service.process_image(image_url)
  101. if ocr_results and ocr_results.get("detected_text"):
  102. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  103. ocr_results, validated_data.get("model")
  104. )
  105. ocr_results["extracted_attributes"] = ocr_attrs
  106. ocr_text = "\n".join([
  107. f"{item['text']} (confidence: {item['confidence']:.2f})"
  108. for item in ocr_results["detected_text"]
  109. ])
  110. # Visual Processing
  111. visual_service = VisualProcessingService()
  112. product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  113. visual_results = visual_service.process_image(image_url, product_type_hint)
  114. # Combine all product text with source tracking
  115. product_text, source_map = ProductAttributeService.combine_product_text(
  116. title=title,
  117. short_desc=short_desc,
  118. long_desc=long_desc,
  119. ocr_text=ocr_text
  120. )
  121. # Extract attributes with enhanced features and source tracking
  122. result = ProductAttributeService.extract_attributes(
  123. product_text=product_text,
  124. mandatory_attrs=validated_data["mandatory_attrs"],
  125. source_map=source_map,
  126. model=validated_data.get("model"),
  127. extract_additional=validated_data.get("extract_additional", True),
  128. multiple=validated_data.get("multiple", []),
  129. threshold_abs=validated_data.get("threshold_abs", 0.65),
  130. margin=validated_data.get("margin", 0.15),
  131. use_dynamic_thresholds=validated_data.get("use_dynamic_thresholds", True),
  132. use_adaptive_margin=validated_data.get("use_adaptive_margin", True),
  133. use_semantic_clustering=validated_data.get("use_semantic_clustering", True)
  134. )
  135. # Attach OCR results if available
  136. if ocr_results:
  137. result["ocr_results"] = ocr_results
  138. # Attach Visual Processing results if available
  139. if visual_results:
  140. result["visual_results"] = visual_results
  141. response_serializer = ProductAttributeResultSerializer(data=result)
  142. if response_serializer.is_valid():
  143. return Response(response_serializer.data, status=status.HTTP_200_OK)
  144. return Response(result, status=status.HTTP_200_OK)
  145. # ==================== OPTIMIZED BATCH VIEW ====================
  146. # class BatchExtractProductAttributesView(APIView):
  147. # """
  148. # ⚡ PERFORMANCE OPTIMIZED: Batch extraction with intelligent parallelization
  149. # Expected performance: 10 products in 30-60 seconds (with image processing)
  150. # """
  151. # def post(self, request):
  152. # import time
  153. # start_time = time.time()
  154. # serializer = BatchProductRequestSerializer(data=request.data)
  155. # if not serializer.is_valid():
  156. # return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  157. # validated_data = serializer.validated_data
  158. # product_list = validated_data.get("products", [])
  159. # logger.info(f"🚀 Starting batch processing for {len(product_list)} products")
  160. # # ==================== OPTIMIZATION 1: Bulk DB Query ====================
  161. # item_ids = [p['item_id'] for p in product_list]
  162. # products_queryset = Product.objects.filter(
  163. # item_id__in=item_ids
  164. # ).prefetch_related('attribute_values')
  165. # product_map = {product.item_id: product for product in products_queryset}
  166. # # Prefetch ALL original attribute values in ONE query
  167. # original_values_qs = ProductAttributeValue.objects.filter(
  168. # product__item_id__in=item_ids
  169. # ).select_related('product')
  170. # original_values_map = {}
  171. # for attr_val in original_values_qs:
  172. # item_id = attr_val.product.item_id
  173. # if item_id not in original_values_map:
  174. # original_values_map[item_id] = {}
  175. # original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
  176. # logger.info(f"✓ Loaded {len(product_map)} products from database")
  177. # # Extract settings
  178. # model = validated_data.get("model")
  179. # extract_additional = validated_data.get("extract_additional", True)
  180. # process_image = validated_data.get("process_image", True)
  181. # multiple = validated_data.get("multiple", [])
  182. # threshold_abs = validated_data.get("threshold_abs", 0.65)
  183. # margin = validated_data.get("margin", 0.15)
  184. # use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", False)
  185. # use_adaptive_margin = validated_data.get("use_adaptive_margin", False)
  186. # use_semantic_clustering = validated_data.get("use_semantic_clustering", False)
  187. # results = []
  188. # successful = 0
  189. # failed = 0
  190. # # ==================== OPTIMIZATION 2: Conditional Service Init ====================
  191. # # Only initialize if processing images
  192. # ocr_service = None
  193. # visual_service = None
  194. # if process_image:
  195. # from .ocr_service import OCRService
  196. # from .visual_processing_service import VisualProcessingService
  197. # ocr_service = OCRService()
  198. # visual_service = VisualProcessingService()
  199. # logger.info("✓ Image processing services initialized")
  200. # # ==================== OPTIMIZATION 3: Smart Parallelization ====================
  201. # def process_single_product(product_entry):
  202. # """Process a single product (runs in parallel)"""
  203. # import time
  204. # product_start = time.time()
  205. # item_id = product_entry['item_id']
  206. # mandatory_attrs = product_entry['mandatory_attrs']
  207. # if item_id not in product_map:
  208. # return {
  209. # "product_id": item_id,
  210. # "error": "Product not found in database"
  211. # }, False
  212. # product = product_map[item_id]
  213. # try:
  214. # title = product.product_name
  215. # short_desc = product.product_short_description
  216. # long_desc = product.product_long_description
  217. # image_url = product.image_path
  218. # ocr_results = None
  219. # ocr_text = None
  220. # visual_results = None
  221. # # ⚡ SKIP IMAGE PROCESSING IF DISABLED (HUGE TIME SAVER)
  222. # if process_image and image_url:
  223. # if ocr_service:
  224. # ocr_results = ocr_service.process_image(image_url)
  225. # if ocr_results and ocr_results.get("detected_text"):
  226. # ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  227. # ocr_results, model
  228. # )
  229. # ocr_results["extracted_attributes"] = ocr_attrs
  230. # ocr_text = "\n".join([
  231. # f"{item['text']} (confidence: {item['confidence']:.2f})"
  232. # for item in ocr_results["detected_text"]
  233. # ])
  234. # if visual_service:
  235. # product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  236. # visual_results = visual_service.process_image(image_url, product_type_hint)
  237. # if visual_results and visual_results.get('visual_attributes'):
  238. # visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  239. # visual_results['visual_attributes']
  240. # )
  241. # # Combine product text with source tracking
  242. # product_text, source_map = ProductAttributeService.combine_product_text(
  243. # title=title,
  244. # short_desc=short_desc,
  245. # long_desc=long_desc,
  246. # ocr_text=ocr_text
  247. # )
  248. # # ⚡ EXTRACT ATTRIBUTES WITH CACHING ENABLED
  249. # extracted = ProductAttributeService.extract_attributes(
  250. # product_text=product_text,
  251. # mandatory_attrs=mandatory_attrs,
  252. # source_map=source_map,
  253. # model=model,
  254. # extract_additional=extract_additional,
  255. # multiple=multiple,
  256. # # threshold_abs=threshold_abs,
  257. # # margin=margin,
  258. # # use_dynamic_thresholds=use_dynamic_thresholds,
  259. # # use_adaptive_margin=use_adaptive_margin,
  260. # # use_semantic_clustering=use_semantic_clustering,
  261. # use_cache=True # ⚡ CRITICAL: Enable caching
  262. # )
  263. # # Add original values
  264. # original_attrs = original_values_map.get(item_id, {})
  265. # for attr_name, attr_values in extracted.get("mandatory", {}).items():
  266. # if isinstance(attr_values, list):
  267. # for attr_obj in attr_values:
  268. # if isinstance(attr_obj, dict):
  269. # attr_obj["original_value"] = original_attrs.get(attr_name, "")
  270. # for attr_name, attr_values in extracted.get("additional", {}).items():
  271. # if isinstance(attr_values, list):
  272. # for attr_obj in attr_values:
  273. # if isinstance(attr_obj, dict):
  274. # attr_obj["original_value"] = original_attrs.get(attr_name, "")
  275. # result = {
  276. # "product_id": product.item_id,
  277. # "mandatory": extracted.get("mandatory", {}),
  278. # "additional": extracted.get("additional", {}),
  279. # }
  280. # if ocr_results:
  281. # result["ocr_results"] = ocr_results
  282. # if visual_results:
  283. # result["visual_results"] = visual_results
  284. # processing_time = time.time() - product_start
  285. # logger.info(f"✓ Processed {item_id} in {processing_time:.2f}s")
  286. # return result, True
  287. # except Exception as e:
  288. # logger.error(f"❌ Error processing {item_id}: {str(e)}")
  289. # return {
  290. # "product_id": item_id,
  291. # "error": str(e)
  292. # }, False
  293. # # ==================== OPTIMIZATION 4: Parallel Execution ====================
  294. # # Adjust workers based on whether image processing is enabled
  295. # max_workers = min(3 if process_image else 10, len(product_list))
  296. # logger.info(f"⚡ Using {max_workers} parallel workers")
  297. # with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
  298. # # Submit all tasks
  299. # future_to_product = {
  300. # executor.submit(process_single_product, product): product
  301. # for product in product_list
  302. # }
  303. # # Collect results as they complete
  304. # for future in concurrent.futures.as_completed(future_to_product):
  305. # try:
  306. # result, success = future.result()
  307. # results.append(result)
  308. # if success:
  309. # successful += 1
  310. # else:
  311. # failed += 1
  312. # except Exception as e:
  313. # failed += 1
  314. # logger.error(f"❌ Future execution error: {str(e)}")
  315. # results.append({
  316. # "product_id": "unknown",
  317. # "error": str(e)
  318. # })
  319. # total_time = time.time() - start_time
  320. # # Get cache statistics
  321. # cache_stats = ProductAttributeService.get_cache_stats()
  322. # logger.info(f"""
  323. # 🎉 BATCH PROCESSING COMPLETE
  324. # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  325. # Total products: {len(product_list)}
  326. # Successful: {successful}
  327. # Failed: {failed}
  328. # Total time: {total_time:.2f}s
  329. # Avg time/product: {total_time/len(product_list):.2f}s
  330. # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  331. # """)
  332. # batch_result = {
  333. # "results": results,
  334. # "total_products": len(product_list),
  335. # "successful": successful,
  336. # "failed": failed,
  337. # "performance": {
  338. # "total_time_seconds": round(total_time, 2),
  339. # "avg_time_per_product": round(total_time / len(product_list), 2),
  340. # "workers_used": max_workers
  341. # },
  342. # "cache_stats": cache_stats
  343. # }
  344. # response_serializer = BatchProductResponseSerializer(data=batch_result)
  345. # if response_serializer.is_valid():
  346. # return Response(response_serializer.data, status=status.HTTP_200_OK)
  347. # return Response(batch_result, status=status.HTTP_200_OK)
  348. # VERSION WITH PARALLELIZATION
  349. class BatchExtractProductAttributesView(APIView):
  350. """
  351. ⚡ PERFORMANCE OPTIMIZED: Batch extraction with intelligent parallelization
  352. Expected performance: 10 products in 30-60 seconds (with image processing)
  353. NOW WITH USER VALUE REASONING
  354. """
  355. def post(self, request):
  356. import time
  357. start_time = time.time()
  358. serializer = BatchProductRequestSerializer(data=request.data)
  359. if not serializer.is_valid():
  360. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  361. validated_data = serializer.validated_data
  362. product_list = validated_data.get("products", [])
  363. logger.info(f"🚀 Starting batch processing for {len(product_list)} products")
  364. # ==================== OPTIMIZATION 1: Bulk DB Query ====================
  365. item_ids = [p['item_id'] for p in product_list]
  366. products_queryset = Product.objects.filter(
  367. item_id__in=item_ids
  368. ).prefetch_related('attribute_values')
  369. product_map = {product.item_id: product for product in products_queryset}
  370. # Prefetch ALL original attribute values in ONE query
  371. original_values_qs = ProductAttributeValue.objects.filter(
  372. product__item_id__in=item_ids
  373. ).select_related('product')
  374. original_values_map = {}
  375. for attr_val in original_values_qs:
  376. item_id = attr_val.product.item_id
  377. if item_id not in original_values_map:
  378. original_values_map[item_id] = {}
  379. original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
  380. logger.info(f"✓ Loaded {len(product_map)} products from database")
  381. logger.info(f"✓ Loaded user values for {len(original_values_map)} products")
  382. # Extract settings
  383. model = validated_data.get("model")
  384. extract_additional = validated_data.get("extract_additional", True)
  385. process_image = validated_data.get("process_image", True)
  386. multiple = validated_data.get("multiple", [])
  387. threshold_abs = validated_data.get("threshold_abs", 0.65)
  388. margin = validated_data.get("margin", 0.15)
  389. use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", False)
  390. use_adaptive_margin = validated_data.get("use_adaptive_margin", False)
  391. use_semantic_clustering = validated_data.get("use_semantic_clustering", False)
  392. results = []
  393. successful = 0
  394. failed = 0
  395. # ==================== OPTIMIZATION 2: Conditional Service Init ====================
  396. ocr_service = None
  397. visual_service = None
  398. if process_image:
  399. from .ocr_service import OCRService
  400. from .visual_processing_service import VisualProcessingService
  401. ocr_service = OCRService()
  402. visual_service = VisualProcessingService()
  403. logger.info("✓ Image processing services initialized")
  404. # ==================== OPTIMIZATION 3: Smart Parallelization ====================
  405. def process_single_product(product_entry):
  406. """Process a single product (runs in parallel)"""
  407. import time
  408. product_start = time.time()
  409. item_id = product_entry['item_id']
  410. mandatory_attrs = product_entry['mandatory_attrs']
  411. if item_id not in product_map:
  412. return {
  413. "product_id": item_id,
  414. "error": "Product not found in database"
  415. }, False
  416. product = product_map[item_id]
  417. try:
  418. title = product.product_name
  419. short_desc = product.product_short_description
  420. long_desc = product.product_long_description
  421. image_url = product.image_path
  422. ocr_results = None
  423. ocr_text = None
  424. visual_results = None
  425. # ⚡ SKIP IMAGE PROCESSING IF DISABLED
  426. if process_image and image_url:
  427. if ocr_service:
  428. ocr_results = ocr_service.process_image(image_url)
  429. if ocr_results and ocr_results.get("detected_text"):
  430. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  431. ocr_results, model
  432. )
  433. ocr_results["extracted_attributes"] = ocr_attrs
  434. ocr_text = "\n".join([
  435. f"{item['text']} (confidence: {item['confidence']:.2f})"
  436. for item in ocr_results["detected_text"]
  437. ])
  438. if visual_service:
  439. product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  440. visual_results = visual_service.process_image(image_url, product_type_hint)
  441. if visual_results and visual_results.get('visual_attributes'):
  442. visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  443. visual_results['visual_attributes']
  444. )
  445. # Combine product text with source tracking
  446. product_text, source_map = ProductAttributeService.combine_product_text(
  447. title=title,
  448. short_desc=short_desc,
  449. long_desc=long_desc,
  450. ocr_text=ocr_text
  451. )
  452. # 🆕 GET USER-ENTERED VALUES FOR THIS PRODUCT
  453. user_entered_values = original_values_map.get(item_id, {})
  454. print("user entered values are ")
  455. print(user_entered_values)
  456. logger.info(f"Processing {item_id} with {len(user_entered_values)} user-entered values")
  457. # ⚡ EXTRACT ATTRIBUTES WITH USER VALUES AND REASONING
  458. extracted = ProductAttributeService.extract_attributes(
  459. product_text=product_text,
  460. mandatory_attrs=mandatory_attrs,
  461. source_map=source_map,
  462. model=model,
  463. extract_additional=extract_additional,
  464. multiple=multiple,
  465. use_cache=True,
  466. user_entered_values="" # 🆕 PASS NULL
  467. # user_entered_values=user_entered_values # 🆕 PASS USER VALUES
  468. )
  469. # NOTE: Original values are now part of LLM response with reasoning
  470. # No need to add them separately - they're already in the "user_value" field
  471. result = {
  472. "product_id": product.item_id,
  473. "mandatory": extracted.get("mandatory", {}),
  474. "additional": extracted.get("additional", {}),
  475. }
  476. if ocr_results:
  477. result["ocr_results"] = ocr_results
  478. if visual_results:
  479. result["visual_results"] = visual_results
  480. processing_time = time.time() - product_start
  481. logger.info(f"✓ Processed {item_id} in {processing_time:.2f}s")
  482. return result, True
  483. except Exception as e:
  484. logger.error(f"❌ Error processing {item_id}: {str(e)}")
  485. return {
  486. "product_id": item_id,
  487. "error": str(e)
  488. }, False
  489. # ==================== OPTIMIZATION 4: Parallel Execution ====================
  490. max_workers = min(1 if process_image else 1, len(product_list))
  491. logger.info(f"⚡ Using {max_workers} parallel workers")
  492. with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
  493. future_to_product = {
  494. executor.submit(process_single_product, product): product
  495. for product in product_list
  496. }
  497. for future in concurrent.futures.as_completed(future_to_product):
  498. try:
  499. result, success = future.result()
  500. results.append(result)
  501. if success:
  502. successful += 1
  503. else:
  504. failed += 1
  505. except Exception as e:
  506. failed += 1
  507. logger.error(f"❌ Future execution error: {str(e)}")
  508. results.append({
  509. "product_id": "unknown",
  510. "error": str(e)
  511. })
  512. total_time = time.time() - start_time
  513. # Get cache statistics
  514. cache_stats = ProductAttributeService.get_cache_stats()
  515. logger.info(f"""
  516. 🎉 BATCH PROCESSING COMPLETE
  517. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  518. Total products: {len(product_list)}
  519. Successful: {successful}
  520. Failed: {failed}
  521. Total time: {total_time:.2f}s
  522. Avg time/product: {total_time/len(product_list):.2f}s
  523. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  524. """)
  525. batch_result = {
  526. "results": results,
  527. "total_products": len(product_list),
  528. "successful": successful,
  529. "failed": failed,
  530. "performance": {
  531. "total_time_seconds": round(total_time, 2),
  532. "avg_time_per_product": round(total_time / len(product_list), 2),
  533. "workers_used": max_workers
  534. },
  535. "cache_stats": cache_stats
  536. }
  537. response_serializer = BatchProductResponseSerializer(data=batch_result)
  538. if response_serializer.is_valid():
  539. return Response(response_serializer.data, status=status.HTTP_200_OK)
  540. return Response(batch_result, status=status.HTTP_200_OK)
  541. # # ==================== views.py (OPTIMIZED FOR 4 API KEYS) ====================
  542. # import concurrent.futures
  543. # import logging
  544. # import time
  545. # from rest_framework.views import APIView
  546. # from rest_framework.response import Response
  547. # from rest_framework import status
  548. # from .models import Product, ProductAttributeValue
  549. # from .serializers import BatchProductRequestSerializer, BatchProductResponseSerializer
  550. # from .services import ProductAttributeService
  551. # logger = logging.getLogger(__name__)
  552. # class BatchExtractProductAttributesView(APIView):
  553. # """
  554. # ⚡ PERFORMANCE OPTIMIZED: Batch extraction with 4-key load balancer
  555. # Expected performance with 4 keys: 10 products in 20-35 seconds (with image processing)
  556. # NOW WITH USER VALUE REASONING + MULTI-KEY SUPPORT
  557. # """
  558. # def post(self, request):
  559. # start_time = time.time()
  560. # serializer = BatchProductRequestSerializer(data=request.data)
  561. # if not serializer.is_valid():
  562. # return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  563. # validated_data = serializer.validated_data
  564. # product_list = validated_data.get("products", [])
  565. # logger.info(f"🚀 Starting batch processing for {len(product_list)} products")
  566. # # ==================== OPTIMIZATION 1: Bulk DB Query ====================
  567. # item_ids = [p['item_id'] for p in product_list]
  568. # products_queryset = Product.objects.filter(
  569. # item_id__in=item_ids
  570. # ).prefetch_related('attribute_values')
  571. # product_map = {product.item_id: product for product in products_queryset}
  572. # # Prefetch ALL original attribute values in ONE query
  573. # original_values_qs = ProductAttributeValue.objects.filter(
  574. # product__item_id__in=item_ids
  575. # ).select_related('product')
  576. # original_values_map = {}
  577. # for attr_val in original_values_qs:
  578. # item_id = attr_val.product.item_id
  579. # if item_id not in original_values_map:
  580. # original_values_map[item_id] = {}
  581. # original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
  582. # logger.info(f"✓ Loaded {len(product_map)} products from database")
  583. # logger.info(f"✓ Loaded user values for {len(original_values_map)} products")
  584. # # Extract settings
  585. # model = validated_data.get("model")
  586. # extract_additional = validated_data.get("extract_additional", True)
  587. # process_image = validated_data.get("process_image", True)
  588. # multiple = validated_data.get("multiple", [])
  589. # threshold_abs = validated_data.get("threshold_abs", 0.65)
  590. # margin = validated_data.get("margin", 0.15)
  591. # use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", False)
  592. # use_adaptive_margin = validated_data.get("use_adaptive_margin", False)
  593. # use_semantic_clustering = validated_data.get("use_semantic_clustering", False)
  594. # results = []
  595. # successful = 0
  596. # failed = 0
  597. # # ==================== OPTIMIZATION 2: Conditional Service Init ====================
  598. # ocr_service = None
  599. # visual_service = None
  600. # if process_image:
  601. # try:
  602. # from .ocr_service import OCRService
  603. # from .visual_processing_service import VisualProcessingService
  604. # ocr_service = OCRService()
  605. # visual_service = VisualProcessingService()
  606. # logger.info("✓ Image processing services initialized")
  607. # except ImportError as e:
  608. # logger.warning(f"⚠️ Image processing services not available: {e}")
  609. # process_image = False
  610. # # ==================== OPTIMIZATION 3: Smart Parallelization with 4 Keys ====================
  611. # def process_single_product(product_entry):
  612. # """Process a single product (runs in parallel)"""
  613. # product_start = time.time()
  614. # item_id = product_entry['item_id']
  615. # mandatory_attrs = product_entry['mandatory_attrs']
  616. # if item_id not in product_map:
  617. # return {
  618. # "product_id": item_id,
  619. # "error": "Product not found in database"
  620. # }, False
  621. # product = product_map[item_id]
  622. # try:
  623. # title = product.product_name
  624. # short_desc = product.product_short_description
  625. # long_desc = product.product_long_description
  626. # image_url = product.image_path
  627. # ocr_results = None
  628. # ocr_text = None
  629. # visual_results = None
  630. # # ⚡ IMAGE PROCESSING (if enabled)
  631. # if process_image and image_url:
  632. # if ocr_service:
  633. # try:
  634. # ocr_results = ocr_service.process_image(image_url)
  635. # if ocr_results and ocr_results.get("detected_text"):
  636. # ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  637. # ocr_results, model
  638. # )
  639. # ocr_results["extracted_attributes"] = ocr_attrs
  640. # ocr_text = "\n".join([
  641. # f"{item['text']} (confidence: {item['confidence']:.2f})"
  642. # for item in ocr_results["detected_text"]
  643. # ])
  644. # except Exception as e:
  645. # logger.warning(f"OCR failed for {item_id}: {e}")
  646. # if visual_service:
  647. # try:
  648. # product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  649. # visual_results = visual_service.process_image(image_url, product_type_hint)
  650. # if visual_results and visual_results.get('visual_attributes'):
  651. # visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  652. # visual_results['visual_attributes']
  653. # )
  654. # except Exception as e:
  655. # logger.warning(f"Visual processing failed for {item_id}: {e}")
  656. # # Combine product text with source tracking
  657. # product_text, source_map = ProductAttributeService.combine_product_text(
  658. # title=title,
  659. # short_desc=short_desc,
  660. # long_desc=long_desc,
  661. # ocr_text=ocr_text
  662. # )
  663. # # 🆕 GET USER-ENTERED VALUES FOR THIS PRODUCT
  664. # user_entered_values = original_values_map.get(item_id, {})
  665. # if user_entered_values:
  666. # logger.debug(f"Processing {item_id} with {len(user_entered_values)} user-entered values")
  667. # # ⚡ EXTRACT ATTRIBUTES WITH MULTI-KEY LOAD BALANCER
  668. # extracted = ProductAttributeService.extract_attributes(
  669. # product_text=product_text,
  670. # mandatory_attrs=mandatory_attrs,
  671. # source_map=source_map,
  672. # model=model,
  673. # extract_additional=extract_additional,
  674. # multiple=multiple,
  675. # use_cache=True,
  676. # user_entered_values=user_entered_values
  677. # )
  678. # result = {
  679. # "product_id": product.item_id,
  680. # "mandatory": extracted.get("mandatory", {}),
  681. # "additional": extracted.get("additional", {}),
  682. # }
  683. # if ocr_results:
  684. # result["ocr_results"] = ocr_results
  685. # if visual_results:
  686. # result["visual_results"] = visual_results
  687. # processing_time = time.time() - product_start
  688. # logger.info(f"✓ Processed {item_id} in {processing_time:.2f}s")
  689. # return result, True
  690. # except Exception as e:
  691. # logger.error(f"❌ Error processing {item_id}: {str(e)}", exc_info=True)
  692. # return {
  693. # "product_id": item_id,
  694. # "error": str(e)
  695. # }, False
  696. # # ==================== OPTIMIZATION 4: Parallel Execution with 4 Keys ====================
  697. # # With 4 API keys, we can safely run more workers
  698. # # Image processing: 4-6 workers (I/O bound)
  699. # # No image processing: 8-12 workers (CPU bound)
  700. # if process_image:
  701. # max_workers = min(3, len(product_list))
  702. # else:
  703. # max_workers = min(5, len(product_list))
  704. # logger.info(f"⚡ Using {max_workers} parallel workers with 4-key load balancer")
  705. # with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
  706. # future_to_product = {
  707. # executor.submit(process_single_product, product): product
  708. # for product in product_list
  709. # }
  710. # for future in concurrent.futures.as_completed(future_to_product):
  711. # try:
  712. # result, success = future.result()
  713. # results.append(result)
  714. # if success:
  715. # successful += 1
  716. # else:
  717. # failed += 1
  718. # except Exception as e:
  719. # failed += 1
  720. # logger.error(f"❌ Future execution error: {str(e)}")
  721. # results.append({
  722. # "product_id": "unknown",
  723. # "error": str(e)
  724. # })
  725. # total_time = time.time() - start_time
  726. # # Get comprehensive statistics
  727. # cache_stats = ProductAttributeService.get_cache_stats()
  728. # logger.info(f"""
  729. # ╔═══════════════════════════════════════════════════════════╗
  730. # ║ 🎉 BATCH PROCESSING COMPLETE ║
  731. # ╠═══════════════════════════════════════════════════════════╣
  732. # ║ Total products: {len(product_list):<3} ║
  733. # ║ Successful: {successful:<3} ║
  734. # ║ Failed: {failed:<3} ║
  735. # ║ Total time: {total_time:.2f}s ║
  736. # ║ Avg time/product: {total_time/len(product_list):.2f}s ║
  737. # ║ Workers used: {max_workers:<2} ║
  738. # ║ API keys available: {cache_stats['load_balancer'].get('available_keys', 0)}/{cache_stats['load_balancer'].get('total_keys', 0)} ║
  739. # ╚═══════════════════════════════════════════════════════════╝
  740. # """)
  741. # batch_result = {
  742. # "results": results,
  743. # "total_products": len(product_list),
  744. # "successful": successful,
  745. # "failed": failed,
  746. # "performance": {
  747. # "total_time_seconds": round(total_time, 2),
  748. # "avg_time_per_product": round(total_time / len(product_list), 2),
  749. # "workers_used": max_workers,
  750. # "throughput_products_per_second": round(len(product_list) / total_time, 2)
  751. # },
  752. # "cache_stats": cache_stats
  753. # }
  754. # response_serializer = BatchProductResponseSerializer(data=batch_result)
  755. # if response_serializer.is_valid():
  756. # return Response(response_serializer.data, status=status.HTTP_200_OK)
  757. # return Response(batch_result, status=status.HTTP_200_OK)
  758. class ProductListView(APIView):
  759. """
  760. GET API to list all products with details
  761. """
  762. def get(self, request):
  763. products = Product.objects.all()
  764. serializer = ProductSerializer(products, many=True)
  765. return Response(serializer.data, status=status.HTTP_200_OK)
  766. # -------------------------------------------------------------------------------------------------
  767. def generate_product_excel_background():
  768. """
  769. Function to perform batch attribute extraction for all products and generate an Excel file.
  770. Runs in a background thread to avoid blocking the API response.
  771. Logs success/failure and saves a status file for external monitoring.
  772. """
  773. logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
  774. successful = 0
  775. failed = 0
  776. results = [] # To store detailed extraction results for Excel sheet 2
  777. # Function to write status file (SUCCESS/FAILED)
  778. def write_status(status_type, error_msg=None):
  779. status_data = {
  780. "status": status_type,
  781. "timestamp": datetime.now().isoformat(),
  782. "products_processed": successful + failed,
  783. "products_successful": successful,
  784. "products_failed": failed,
  785. # FIX: Use the updated EXCEL_FILE_NAME and OUTPUT_URL
  786. "excel_path": os.path.join(OUTPUT_URL, EXCEL_FILE_NAME) if status_type == "SUCCESS" else None,
  787. "log_path": os.path.join(OUTPUT_URL, LOG_FILE_NAME),
  788. "error_message": error_msg
  789. }
  790. try:
  791. # FIX: STATUS_FILE_PATH is now inside generated_outputs
  792. with open(STATUS_FILE_PATH, 'w') as f:
  793. json.dump(status_data, f, indent=4)
  794. except Exception as e:
  795. logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
  796. try:
  797. # 1. PREFETCH all necessary related data to minimize database queries
  798. # Prefetch possible values for mandatory attributes
  799. possible_values_prefetch = Prefetch(
  800. 'attributes',
  801. queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
  802. )
  803. # Fetch all ProductTypes with their mandatory attributes and possible values
  804. all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
  805. product_type_map = {
  806. pt.name: pt for pt in all_product_types
  807. }
  808. # Prepare product_list for batch extraction
  809. all_products = Product.objects.all()
  810. product_list = []
  811. for p in all_products:
  812. mandatory_attrs_dict = {}
  813. product_type_name = p.product_type.strip() if p.product_type else None
  814. if product_type_name and product_type_name in product_type_map:
  815. pt = product_type_map[product_type_name]
  816. # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
  817. for attr in pt.attributes.all():
  818. mandatory_attrs_dict[attr.name] = [
  819. pv.value for pv in attr.possible_values.all()
  820. ]
  821. product_list.append({
  822. "item_id": p.item_id,
  823. "product_type_name": product_type_name,
  824. "mandatory_attrs": mandatory_attrs_dict
  825. })
  826. # Batch settings (using defaults)
  827. model = "llama-3.1-8b-instant"
  828. extract_additional = True
  829. process_image = False
  830. multiple = []
  831. threshold_abs = 0.65
  832. margin = 0.15
  833. use_dynamic_thresholds = True
  834. use_adaptive_margin = True
  835. use_semantic_clustering = True
  836. # Batch extraction logic
  837. item_ids = [p['item_id'] for p in product_list]
  838. products_queryset = Product.objects.filter(item_id__in=item_ids)
  839. product_map = {product.item_id: product for product in products_queryset}
  840. found_ids = set(product_map.keys())
  841. for product_entry in product_list:
  842. item_id = product_entry['item_id']
  843. mandatory_attrs = product_entry['mandatory_attrs']
  844. if item_id not in found_ids:
  845. failed += 1
  846. results.append({
  847. "product_id": item_id,
  848. "error": "Product not found in database"
  849. })
  850. logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
  851. continue
  852. product = product_map[item_id]
  853. try:
  854. title = product.product_name
  855. short_desc = product.product_short_description
  856. long_desc = product.product_long_description
  857. image_url = product.image_path
  858. ocr_results = None
  859. ocr_text = None
  860. visual_results = None
  861. if process_image and image_url:
  862. logger.info(f"Processing image for product {item_id}...")
  863. # OCR Processing
  864. ocr_service = OCRService()
  865. ocr_results = ocr_service.process_image(image_url)
  866. if ocr_results and ocr_results.get("detected_text"):
  867. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  868. ocr_results, model
  869. )
  870. ocr_results["extracted_attributes"] = ocr_attrs
  871. ocr_text = "\n".join([
  872. f"{item['text']} (confidence: {item['confidence']:.2f})"
  873. for item in ocr_results["detected_text"]
  874. ])
  875. # Visual Processing
  876. visual_service = VisualProcessingService()
  877. product_type_hint = product.product_type if product.product_type else None
  878. visual_results = visual_service.process_image(image_url, product_type_hint)
  879. if visual_results and visual_results.get('visual_attributes'):
  880. visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  881. visual_results['visual_attributes']
  882. )
  883. logger.info(f"Image processing done for product {item_id}.")
  884. # Combine product text with source tracking
  885. product_text, source_map = ProductAttributeService.combine_product_text(
  886. title=title,
  887. short_desc=short_desc,
  888. long_desc=long_desc,
  889. ocr_text=ocr_text
  890. )
  891. # Attribute Extraction with source tracking
  892. extracted = ProductAttributeService.extract_attributes(
  893. product_text=product_text,
  894. mandatory_attrs=mandatory_attrs,
  895. source_map=source_map,
  896. model=model,
  897. extract_additional=extract_additional,
  898. multiple=multiple,
  899. threshold_abs=threshold_abs,
  900. margin=margin,
  901. use_dynamic_thresholds=use_dynamic_thresholds,
  902. use_adaptive_margin=use_adaptive_margin,
  903. use_semantic_clustering=use_semantic_clustering
  904. )
  905. result = {
  906. "product_id": item_id,
  907. "mandatory": extracted.get("mandatory", {}),
  908. "additional": extracted.get("additional", {}),
  909. }
  910. if ocr_results:
  911. result["ocr_results"] = ocr_results
  912. if visual_results:
  913. result["visual_results"] = visual_results
  914. results.append(result)
  915. successful += 1
  916. logger.info(f"Attribute extraction successful for product {item_id}.")
  917. except Exception as e:
  918. failed += 1
  919. results.append({
  920. "product_id": item_id,
  921. "error": str(e)
  922. })
  923. logger.exception(f"Error during attribute extraction for product {item_id}.")
  924. logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
  925. # --------------------------------------------------------------------------------
  926. # Generate and save the Excel file
  927. # --------------------------------------------------------------------------------
  928. wb = Workbook()
  929. # Sheet 1: Products (from DB) (Logic is the same, skipped for brevity)
  930. ws_products = wb.active
  931. ws_products.title = "Products"
  932. products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
  933. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  934. header_font = Font(bold=True, color="FFFFFF")
  935. for col_num, header in enumerate(products_headers, 1):
  936. cell = ws_products.cell(row=1, column=col_num)
  937. cell.value = header
  938. cell.fill = header_fill
  939. cell.font = header_font
  940. cell.alignment = Alignment(horizontal="center", vertical="center")
  941. all_products_db = Product.objects.all()
  942. for row_num, product in enumerate(all_products_db, 2):
  943. ws_products.cell(row=row_num, column=1, value=product.item_id)
  944. ws_products.cell(row=row_num, column=2, value=product.product_name)
  945. ws_products.cell(row=row_num, column=3, value=product.product_type)
  946. ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  947. ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  948. ws_products.cell(row=row_num, column=6, value=product.image_path)
  949. for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
  950. ws_products.column_dimensions[col_dim].width = width
  951. # Sheet 2: Attribute_values (Logic is the same, skipped for brevity)
  952. ws_attributes = wb.create_sheet("Attribute_values")
  953. attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
  954. for col_num, header in enumerate(attributes_headers, 1):
  955. cell = ws_attributes.cell(row=1, column=col_num)
  956. cell.value = header
  957. cell.fill = header_fill
  958. cell.font = header_font
  959. cell.alignment = Alignment(horizontal="center", vertical="center")
  960. row_num = 2
  961. all_original_attrs = ProductAttributeValue.objects.all()
  962. original_attrs_lookup = {
  963. (attr.product.item_id, attr.attribute_name): attr.original_value
  964. for attr in all_original_attrs
  965. }
  966. processed_original_keys = set()
  967. for res in results:
  968. # ... (Excel writing logic for generated/original attributes remains unchanged)
  969. item_id = res["product_id"]
  970. if "error" in res:
  971. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  972. if orig_item_id == item_id:
  973. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  974. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  975. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  976. ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
  977. processed_original_keys.add((orig_item_id, orig_attr_name))
  978. row_num += 1
  979. continue
  980. generated_attrs = {}
  981. for cat in ["mandatory", "additional"]:
  982. attrs = res.get(cat, {})
  983. for attr_name, values in attrs.items():
  984. for val in values:
  985. key = (item_id, attr_name)
  986. if key not in generated_attrs:
  987. generated_attrs[key] = []
  988. generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  989. ocr = res.get("ocr_results")
  990. if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
  991. for attr_name, values in ocr["extracted_attributes"].items():
  992. for val in values:
  993. key = (item_id, attr_name)
  994. if key not in generated_attrs:
  995. generated_attrs[key] = []
  996. generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  997. visual = res.get("visual_results")
  998. if visual and "visual_attributes" in visual:
  999. vis_attrs = visual["visual_attributes"]
  1000. if isinstance(vis_attrs, dict):
  1001. for attr_name, values in vis_attrs.items():
  1002. if not isinstance(values, list):
  1003. values = [{"value": values, "source": "visual"}]
  1004. for val in values:
  1005. key = (item_id, attr_name)
  1006. if key not in generated_attrs:
  1007. generated_attrs[key] = []
  1008. generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
  1009. elif isinstance(vis_attrs, list):
  1010. for item in vis_attrs:
  1011. attr_name = item.get("attribute_name") or item.get("name")
  1012. if not attr_name: continue
  1013. value = item.get("value", "")
  1014. source = item.get("source", "visual")
  1015. key = (item_id, attr_name)
  1016. if key not in generated_attrs:
  1017. generated_attrs[key] = []
  1018. generated_attrs[key].append(f"{value} (source: {source})")
  1019. for (attr_item_id, attr_name), gen_values in generated_attrs.items():
  1020. original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
  1021. generated_value = "; ".join(gen_values) if gen_values else ""
  1022. ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
  1023. ws_attributes.cell(row=row_num, column=2, value=attr_name)
  1024. ws_attributes.cell(row=row_num, column=3, value=original_value)
  1025. ws_attributes.cell(row=row_num, column=4, value=generated_value)
  1026. processed_original_keys.add((attr_item_id, attr_name))
  1027. row_num += 1
  1028. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  1029. if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
  1030. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  1031. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  1032. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  1033. ws_attributes.cell(row=row_num, column=4, value="")
  1034. processed_original_keys.add((orig_item_id, orig_attr_name))
  1035. row_num += 1
  1036. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  1037. if (orig_item_id, orig_attr_name) not in processed_original_keys:
  1038. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  1039. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  1040. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  1041. ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
  1042. row_num += 1
  1043. for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
  1044. ws_attributes.column_dimensions[col_dim].width = width
  1045. # FIX: Save to the new EXCEL_FILE_PATH
  1046. wb.save(EXCEL_FILE_PATH)
  1047. logger.info(f"Excel file successfully saved to {EXCEL_FILE_PATH}")
  1048. # Write SUCCESS status
  1049. write_status("SUCCESS")
  1050. logger.info("Background task finished successfully.")
  1051. except Exception as e:
  1052. # Log the critical error and write FAILED status
  1053. logger.exception("CRITICAL ERROR during background Excel generation process.")
  1054. write_status("FAILED", error_msg=str(e))
  1055. # -------------------------------------------------------------------------------------------------
  1056. # THIS DOES NOT DELETE THE RECORD EVEN IF A RECORD IS ABSENT IN THE EXCEL FILE
  1057. # class ProductUploadExcelView(APIView):
  1058. # """
  1059. # POST API to upload an Excel file.
  1060. # """
  1061. # parser_classes = (MultiPartParser, FormParser)
  1062. # def post(self, request, *args, **kwargs):
  1063. # file_obj = request.FILES.get('file')
  1064. # if not file_obj:
  1065. # return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  1066. # try:
  1067. # # ... (Upload and DB processing logic remains unchanged)
  1068. # # Read all sheets from Excel file
  1069. # excel_file = pd.ExcelFile(file_obj)
  1070. # # Check if required sheets exist
  1071. # if 'Products' not in excel_file.sheet_names:
  1072. # logger.error(f"Upload failed: Missing 'Products' sheet in file.")
  1073. # return Response({
  1074. # 'error': "Missing 'Products' sheet",
  1075. # 'available_sheets': excel_file.sheet_names
  1076. # }, status=status.HTTP_400_BAD_REQUEST)
  1077. # df_products = pd.read_excel(excel_file, sheet_name='Products')
  1078. # df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
  1079. # expected_product_cols = {
  1080. # 'item_id', 'product_name', 'product_long_description',
  1081. # 'product_short_description', 'product_type', 'image_path'
  1082. # }
  1083. # if not expected_product_cols.issubset(df_products.columns):
  1084. # logger.error(f"Upload failed: Missing required columns in Products sheet.")
  1085. # return Response({
  1086. # 'error': 'Missing required columns in Products sheet',
  1087. # 'required_columns': list(expected_product_cols),
  1088. # 'found_columns': list(df_products.columns)
  1089. # }, status=status.HTTP_400_BAD_REQUEST)
  1090. # df_attributes = None
  1091. # has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
  1092. # if has_attributes_sheet:
  1093. # df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
  1094. # df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
  1095. # expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
  1096. # if not expected_attr_cols.issubset(df_attributes.columns):
  1097. # logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
  1098. # return Response({
  1099. # 'error': 'Missing required columns in Attribute_values sheet',
  1100. # 'required_columns': list(expected_attr_cols),
  1101. # 'found_columns': list(df_attributes.columns)
  1102. # }, status=status.HTTP_400_BAD_REQUEST)
  1103. # products_created = 0
  1104. # products_updated = 0
  1105. # attributes_created = 0
  1106. # attributes_updated = 0
  1107. # products_failed = 0
  1108. # attributes_failed = 0
  1109. # errors = []
  1110. # with transaction.atomic():
  1111. # for idx, row in df_products.iterrows():
  1112. # item_id = str(row.get('item_id', '')).strip()
  1113. # product_type = str(row.get('product_type', '')).strip()
  1114. # if not item_id:
  1115. # products_failed += 1
  1116. # errors.append(f"Products Row {idx + 2}: Missing item_id")
  1117. # continue
  1118. # try:
  1119. # if product_type:
  1120. # ProductType.objects.get_or_create(name=product_type)
  1121. # defaults = {
  1122. # 'product_name': str(row.get('product_name', '')),
  1123. # 'product_long_description': str(row.get('product_long_description', '')),
  1124. # 'product_short_description': str(row.get('product_short_description', '')),
  1125. # 'product_type': product_type,
  1126. # 'image_path': str(row.get('image_path', '')),
  1127. # }
  1128. # obj, created = Product.objects.update_or_create(item_id=item_id, defaults=defaults)
  1129. # if created: products_created += 1
  1130. # else: products_updated += 1
  1131. # except Exception as e:
  1132. # products_failed += 1
  1133. # errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
  1134. # logger.error(f"Error processing product {item_id} in Products sheet: {e}")
  1135. # if has_attributes_sheet and df_attributes is not None:
  1136. # item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
  1137. # existing_products = {p.item_id: p for p in Product.objects.filter(item_id__in=item_ids_in_attrs)}
  1138. # for idx, row in df_attributes.iterrows():
  1139. # item_id = str(row.get('item_id', '')).strip()
  1140. # attribute_name = str(row.get('attribute_name', '')).strip()
  1141. # original_value = str(row.get('original_value', '')).strip()
  1142. # if not item_id or not attribute_name:
  1143. # attributes_failed += 1
  1144. # errors.append(f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name")
  1145. # continue
  1146. # product = existing_products.get(item_id)
  1147. # if not product:
  1148. # attributes_failed += 1
  1149. # errors.append(f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. Make sure it exists in Products sheet.")
  1150. # continue
  1151. # try:
  1152. # attr_obj, created = ProductAttributeValue.objects.update_or_create(
  1153. # product=product,
  1154. # attribute_name=attribute_name,
  1155. # defaults={'original_value': original_value}
  1156. # )
  1157. # if created: attributes_created += 1
  1158. # else: attributes_updated += 1
  1159. # except Exception as e:
  1160. # attributes_failed += 1
  1161. # errors.append(f"Attribute_values Row {idx + 2} (item_id: {item_id}, attribute: {attribute_name}): {str(e)}")
  1162. # logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
  1163. # # Prepare response data
  1164. # response_data = {
  1165. # 'message': 'Upload completed',
  1166. # 'products': {
  1167. # 'created': products_created, 'updated': products_updated, 'failed': products_failed,
  1168. # 'total_processed': products_created + products_updated + products_failed
  1169. # },
  1170. # 'attribute_values': {
  1171. # 'created': attributes_created, 'updated': attributes_updated, 'failed': attributes_failed,
  1172. # 'total_processed': attributes_created + attributes_updated + attributes_failed
  1173. # } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
  1174. # 'generated_excel_status': 'Excel generation started in the background.'
  1175. # }
  1176. # if errors:
  1177. # response_data['errors'] = errors[:50]
  1178. # if len(errors) > 50:
  1179. # response_data['errors'].append(f"... and {len(errors) - 50} more errors")
  1180. # upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
  1181. # # Start background thread for Excel generation if upload was successful
  1182. # if products_failed == 0 and attributes_failed == 0:
  1183. # logger.info("API call successful. Triggering background Excel generation thread is commented for now !!!!.")
  1184. # # threading.Thread(target=generate_product_excel_background, daemon=True).start()
  1185. # ## FIX: Update monitoring URLs to point to the new generated_outputs subfolder
  1186. # # response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
  1187. # # response_data['monitoring'] = {
  1188. # # 'excel_file': os.path.join(OUTPUT_URL, EXCEL_FILE_NAME),
  1189. # # 'status_file': os.path.join(OUTPUT_URL, STATUS_FILE_NAME),
  1190. # # 'log_file': os.path.join(OUTPUT_URL, LOG_FILE_NAME),
  1191. # # 'note': 'These files will be available once the background process completes.'
  1192. # # }
  1193. # else:
  1194. # logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
  1195. # response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
  1196. # return Response(response_data, status=upload_status)
  1197. # except pd.errors.EmptyDataError:
  1198. # logger.error('The uploaded Excel file is empty or invalid.')
  1199. # return Response({'error': 'The uploaded Excel file is empty or invalid'}, status=status.HTTP_400_BAD_REQUEST)
  1200. # except Exception as e:
  1201. # logger.exception(f'An unexpected error occurred while processing the file.')
  1202. # return Response({'error': f'An unexpected error occurred while processing the file: {str(e)}'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1203. # THIS MAKES THE DB IN SYNC WITH THE EXCEL. IF A PRODUCT IS NOT PRESENT IT GETS DELETED.
  1204. from rest_framework.views import APIView
  1205. from rest_framework.response import Response
  1206. from rest_framework import status
  1207. from rest_framework.parsers import MultiPartParser, FormParser
  1208. from django.db import transaction
  1209. import pandas as pd
  1210. import logging
  1211. import os
  1212. # import threading # Uncomment if you use background excel generation
  1213. from .models import Product, ProductType, ProductAttributeValue
  1214. logger = logging.getLogger(__name__)
  1215. class ProductUploadExcelView(APIView):
  1216. """
  1217. POST API to upload an Excel file and synchronize Products & Attributes with DB.
  1218. If a product is missing in Excel, it will be deleted from the database.
  1219. """
  1220. parser_classes = (MultiPartParser, FormParser)
  1221. def post(self, request, *args, **kwargs):
  1222. file_obj = request.FILES.get('file')
  1223. if not file_obj:
  1224. return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  1225. try:
  1226. # Read all sheets
  1227. excel_file = pd.ExcelFile(file_obj)
  1228. if 'Products' not in excel_file.sheet_names:
  1229. logger.error("Missing 'Products' sheet in uploaded file.")
  1230. return Response({
  1231. 'error': "Missing 'Products' sheet",
  1232. 'available_sheets': excel_file.sheet_names
  1233. }, status=status.HTTP_400_BAD_REQUEST)
  1234. df_products = pd.read_excel(excel_file, sheet_name='Products')
  1235. df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
  1236. expected_product_cols = {
  1237. 'item_id', 'product_name', 'product_long_description',
  1238. 'product_short_description', 'product_type', 'image_path'
  1239. }
  1240. if not expected_product_cols.issubset(df_products.columns):
  1241. logger.error("Missing required columns in Products sheet.")
  1242. return Response({
  1243. 'error': 'Missing required columns in Products sheet',
  1244. 'required_columns': list(expected_product_cols),
  1245. 'found_columns': list(df_products.columns)
  1246. }, status=status.HTTP_400_BAD_REQUEST)
  1247. # Optional attributes sheet
  1248. has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
  1249. df_attributes = None
  1250. if has_attributes_sheet:
  1251. df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
  1252. df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
  1253. expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
  1254. if not expected_attr_cols.issubset(df_attributes.columns):
  1255. logger.error("Missing required columns in Attribute_values sheet.")
  1256. return Response({
  1257. 'error': 'Missing required columns in Attribute_values sheet',
  1258. 'required_columns': list(expected_attr_cols),
  1259. 'found_columns': list(df_attributes.columns)
  1260. }, status=status.HTTP_400_BAD_REQUEST)
  1261. products_created = 0
  1262. products_updated = 0
  1263. products_deleted = 0
  1264. attributes_created = 0
  1265. attributes_updated = 0
  1266. attributes_deleted = 0
  1267. products_failed = 0
  1268. attributes_failed = 0
  1269. errors = []
  1270. with transaction.atomic():
  1271. # -------------------------------
  1272. # 🔥 TRUE SYNC: Delete missing products
  1273. # -------------------------------
  1274. existing_item_ids = set(Product.objects.values_list('item_id', flat=True))
  1275. uploaded_item_ids = set(df_products['item_id'].astype(str))
  1276. to_delete = existing_item_ids - uploaded_item_ids
  1277. if to_delete:
  1278. deleted_count, _ = Product.objects.filter(item_id__in=to_delete).delete()
  1279. products_deleted += deleted_count
  1280. logger.info(f"Deleted {deleted_count} products missing in Excel.")
  1281. # -------------------------------
  1282. # ✅ Create or update products
  1283. # -------------------------------
  1284. for idx, row in df_products.iterrows():
  1285. item_id = str(row.get('item_id', '')).strip()
  1286. product_type = str(row.get('product_type', '')).strip()
  1287. if not item_id:
  1288. products_failed += 1
  1289. errors.append(f"Products Row {idx + 2}: Missing item_id")
  1290. continue
  1291. try:
  1292. if product_type:
  1293. ProductType.objects.get_or_create(name=product_type)
  1294. defaults = {
  1295. 'product_name': str(row.get('product_name', '')),
  1296. 'product_long_description': str(row.get('product_long_description', '')),
  1297. 'product_short_description': str(row.get('product_short_description', '')),
  1298. 'product_type': product_type,
  1299. 'image_path': str(row.get('image_path', '')),
  1300. }
  1301. obj, created = Product.objects.update_or_create(item_id=item_id, defaults=defaults)
  1302. if created:
  1303. products_created += 1
  1304. else:
  1305. products_updated += 1
  1306. except Exception as e:
  1307. products_failed += 1
  1308. errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
  1309. logger.error(f"Error processing product {item_id}: {e}")
  1310. # -------------------------------
  1311. # ✅ Handle attributes (optional)
  1312. # -------------------------------
  1313. if has_attributes_sheet and df_attributes is not None:
  1314. item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
  1315. existing_products = {p.item_id: p for p in Product.objects.filter(item_id__in=item_ids_in_attrs)}
  1316. # 🔥 TRUE SYNC for attributes: delete attributes linked to deleted products
  1317. if to_delete:
  1318. deleted_attr_count, _ = ProductAttributeValue.objects.filter(product__item_id__in=to_delete).delete()
  1319. attributes_deleted += deleted_attr_count
  1320. logger.info(f"Deleted {deleted_attr_count} attributes linked to removed products.")
  1321. for idx, row in df_attributes.iterrows():
  1322. item_id = str(row.get('item_id', '')).strip()
  1323. attribute_name = str(row.get('attribute_name', '')).strip()
  1324. original_value = str(row.get('original_value', '')).strip()
  1325. if not item_id or not attribute_name:
  1326. attributes_failed += 1
  1327. errors.append(f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name")
  1328. continue
  1329. product = existing_products.get(item_id)
  1330. if not product:
  1331. attributes_failed += 1
  1332. errors.append(f"Attribute_values Row {idx + 2}: Product '{item_id}' not found.")
  1333. continue
  1334. try:
  1335. attr_obj, created = ProductAttributeValue.objects.update_or_create(
  1336. product=product,
  1337. attribute_name=attribute_name,
  1338. defaults={'original_value': original_value}
  1339. )
  1340. if created:
  1341. attributes_created += 1
  1342. else:
  1343. attributes_updated += 1
  1344. except Exception as e:
  1345. attributes_failed += 1
  1346. errors.append(f"Attribute_values Row {idx + 2} (item_id: {item_id}, attribute: {attribute_name}): {str(e)}")
  1347. logger.error(f"Error processing attribute {attribute_name} for {item_id}: {e}")
  1348. # -------------------------------
  1349. # ✅ Prepare response
  1350. # -------------------------------
  1351. response_data = {
  1352. 'message': 'Upload and synchronization completed',
  1353. 'products': {
  1354. 'created': products_created,
  1355. 'updated': products_updated,
  1356. 'deleted': products_deleted,
  1357. 'failed': products_failed,
  1358. 'total_processed': products_created + products_updated + products_deleted + products_failed
  1359. },
  1360. 'attribute_values': {
  1361. 'created': attributes_created,
  1362. 'updated': attributes_updated,
  1363. 'deleted': attributes_deleted,
  1364. 'failed': attributes_failed,
  1365. 'total_processed': attributes_created + attributes_updated + attributes_deleted + attributes_failed
  1366. } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
  1367. 'generated_excel_status': 'Excel generation skipped (true sync mode).'
  1368. }
  1369. if errors:
  1370. response_data['errors'] = errors[:50]
  1371. if len(errors) > 50:
  1372. response_data['errors'].append(f"... and {len(errors) - 50} more errors")
  1373. upload_status = status.HTTP_201_CREATED if (products_failed == 0 and attributes_failed == 0) else status.HTTP_207_MULTI_STATUS
  1374. return Response(response_data, status=upload_status)
  1375. except pd.errors.EmptyDataError:
  1376. logger.error('The uploaded Excel file is empty or invalid.')
  1377. return Response({'error': 'The uploaded Excel file is empty or invalid'}, status=status.HTTP_400_BAD_REQUEST)
  1378. except Exception as e:
  1379. logger.exception(f'Unexpected error while processing Excel file.')
  1380. return Response({'error': f'Unexpected error: {str(e)}'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1381. class DownloadExcelTemplateView(APIView):
  1382. """
  1383. GET API to download an Excel template with two sheets:
  1384. 1. Products sheet with sample data
  1385. 2. Attribute_values sheet with sample data
  1386. """
  1387. def get(self, request):
  1388. # Create a new workbook
  1389. wb = Workbook()
  1390. # Remove default sheet
  1391. if 'Sheet' in wb.sheetnames:
  1392. wb.remove(wb['Sheet'])
  1393. # ===== Create Products Sheet =====
  1394. ws_products = wb.create_sheet("Products", 0)
  1395. # Define headers for Products
  1396. products_headers = [
  1397. 'ITEM ID',
  1398. 'PRODUCT NAME',
  1399. 'PRODUCT TYPE',
  1400. 'Product Short Description',
  1401. 'Product Long Description',
  1402. 'image_path'
  1403. ]
  1404. # Style for headers
  1405. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  1406. header_font = Font(bold=True, color="FFFFFF")
  1407. # Add headers to Products sheet
  1408. for col_num, header in enumerate(products_headers, 1):
  1409. cell = ws_products.cell(row=1, column=col_num)
  1410. cell.value = header
  1411. cell.fill = header_fill
  1412. cell.font = header_font
  1413. cell.alignment = Alignment(horizontal="center", vertical="center")
  1414. # Add sample data to Products sheet
  1415. sample_products = [
  1416. [
  1417. '3217373735',
  1418. 'Blue V-Neck T-Shirt',
  1419. 'Clothing',
  1420. 'Stylish blue t-shirt with v-neck design',
  1421. 'Premium quality cotton t-shirt featuring a classic v-neck design. Perfect for casual wear. Available in vibrant blue color.',
  1422. 'https://images.unsplash.com/photo-1521572163474-6864f9cf17ab'
  1423. ],
  1424. [
  1425. '1234567890',
  1426. 'Red Cotton Dress',
  1427. 'Clothing',
  1428. 'Beautiful red dress for special occasions',
  1429. 'Elegant red dress made from 100% cotton fabric. Features a flowing design perfect for summer events and parties.',
  1430. 'https://images.unsplash.com/photo-1595777457583-95e059d581b8'
  1431. ],
  1432. [
  1433. '9876543210',
  1434. 'Steel Screws Pack',
  1435. 'Hardware',
  1436. 'Pack of zinc plated steel screws',
  1437. 'Professional grade steel screws with zinc plating for corrosion resistance. Pack contains 50 pieces, 2 inch length, M6 thread size.',
  1438. 'https://images.unsplash.com/photo-1542272604-787c3835535d'
  1439. ]
  1440. ]
  1441. for row_num, row_data in enumerate(sample_products, 2):
  1442. for col_num, value in enumerate(row_data, 1):
  1443. ws_products.cell(row=row_num, column=col_num, value=value)
  1444. # Adjust column widths for Products sheet
  1445. ws_products.column_dimensions['A'].width = 15 # ITEM ID
  1446. ws_products.column_dimensions['B'].width = 25 # PRODUCT NAME
  1447. ws_products.column_dimensions['C'].width = 15 # PRODUCT TYPE
  1448. ws_products.column_dimensions['D'].width = 35 # Short Description
  1449. ws_products.column_dimensions['E'].width = 50 # Long Description
  1450. ws_products.column_dimensions['F'].width = 45 # image_path
  1451. # ===== Create Attribute_values Sheet =====
  1452. ws_attributes = wb.create_sheet("Attribute_values", 1)
  1453. # Define headers for Attribute_values
  1454. attributes_headers = ['item_id', 'attribute_name', 'original_value']
  1455. # Add headers to Attribute_values sheet
  1456. for col_num, header in enumerate(attributes_headers, 1):
  1457. cell = ws_attributes.cell(row=1, column=col_num)
  1458. cell.value = header
  1459. cell.fill = header_fill
  1460. cell.font = header_font
  1461. cell.alignment = Alignment(horizontal="center", vertical="center")
  1462. # Add sample data to Attribute_values sheet
  1463. sample_attributes = [
  1464. ['3217373735', 'Clothing Neck Style', 'V-Neck Square'],
  1465. ['3217373735', 'Condition', 'New with tags'],
  1466. ['3217373735', 'Material', '100% Cotton'],
  1467. ['3217373735', 'Color', 'Sky Blue'],
  1468. ['3217373735', 'Size', 'Medium'],
  1469. ['1234567890', 'Sleeve Length', 'Sleeveless'],
  1470. ['1234567890', 'Condition', 'Brand New'],
  1471. ['1234567890', 'Pattern', 'Solid'],
  1472. ['1234567890', 'Material', 'Cotton Blend'],
  1473. ['1234567890', 'Color', 'Crimson Red'],
  1474. ['9876543210', 'Material', 'Stainless Steel'],
  1475. ['9876543210', 'Thread Size', 'M6'],
  1476. ['9876543210', 'Length', '2 inches'],
  1477. ['9876543210', 'Coating', 'Zinc Plated'],
  1478. ['9876543210', 'Package Quantity', '50 pieces'],
  1479. ]
  1480. for row_num, row_data in enumerate(sample_attributes, 2):
  1481. for col_num, value in enumerate(row_data, 1):
  1482. ws_attributes.cell(row=row_num, column=col_num, value=value)
  1483. # Adjust column widths for Attribute_values sheet
  1484. ws_attributes.column_dimensions['A'].width = 15 # item_id
  1485. ws_attributes.column_dimensions['B'].width = 25 # attribute_name
  1486. ws_attributes.column_dimensions['C'].width = 30 # original_value
  1487. # Add instructions sheet
  1488. ws_instructions = wb.create_sheet("Instructions", 2)
  1489. instructions_text = [
  1490. ['Excel Upload Instructions', ''],
  1491. ['', ''],
  1492. ['Sheet 1: Products', ''],
  1493. ['- Contains product basic information', ''],
  1494. ['- All columns are required', ''],
  1495. ['- ITEM ID must be unique', ''],
  1496. ['', ''],
  1497. ['Sheet 2: Attribute_values', ''],
  1498. ['- Contains original/manual attribute values', ''],
  1499. ['- item_id must match an ITEM ID from Products sheet', ''],
  1500. ['- Multiple rows can have the same item_id (for different attributes)', ''],
  1501. ['- Each attribute per product should be on a separate row', ''],
  1502. ['', ''],
  1503. ['Upload Process:', ''],
  1504. ['1. Fill in your product data in the Products sheet', ''],
  1505. ['2. Fill in attribute values in the Attribute_values sheet', ''],
  1506. ['3. Ensure item_id values match between both sheets', ''],
  1507. ['4. Save the file and upload via API', ''],
  1508. ['', ''],
  1509. ['Notes:', ''],
  1510. ['- Do not change sheet names (must be "Products" and "Attribute_values")', ''],
  1511. ['- Do not change column header names', ''],
  1512. ['- You can delete the sample data rows', ''],
  1513. ['- You can delete this Instructions sheet before uploading', ''],
  1514. ]
  1515. for row_num, row_data in enumerate(instructions_text, 1):
  1516. ws_instructions.cell(row=row_num, column=1, value=row_data[0])
  1517. if row_num == 1:
  1518. cell = ws_instructions.cell(row=row_num, column=1)
  1519. cell.font = Font(bold=True, size=14)
  1520. ws_instructions.column_dimensions['A'].width = 60
  1521. # Save to BytesIO
  1522. output = io.BytesIO()
  1523. wb.save(output)
  1524. output.seek(0)
  1525. # Create response
  1526. response = HttpResponse(
  1527. output.getvalue(),
  1528. content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
  1529. )
  1530. response['Content-Disposition'] = 'attachment; filename=product_upload_template.xlsx'
  1531. return response
  1532. class DownloadProductsWithAttributesExcelView(APIView):
  1533. """
  1534. GET API to download existing products with their attribute values as Excel.
  1535. Useful for users to update existing data.
  1536. """
  1537. def get(self, request):
  1538. from .models import Product, ProductAttributeValue
  1539. # Create workbook
  1540. wb = Workbook()
  1541. if 'Sheet' in wb.sheetnames:
  1542. wb.remove(wb['Sheet'])
  1543. # ===== Products Sheet =====
  1544. ws_products = wb.create_sheet("Products", 0)
  1545. # Headers
  1546. products_headers = [
  1547. 'ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE',
  1548. 'Product Short Description', 'Product Long Description', 'image_path'
  1549. ]
  1550. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  1551. header_font = Font(bold=True, color="FFFFFF")
  1552. for col_num, header in enumerate(products_headers, 1):
  1553. cell = ws_products.cell(row=1, column=col_num)
  1554. cell.value = header
  1555. cell.fill = header_fill
  1556. cell.font = header_font
  1557. cell.alignment = Alignment(horizontal="center", vertical="center")
  1558. # Fetch and add product data
  1559. products = Product.objects.all()
  1560. for row_num, product in enumerate(products, 2):
  1561. ws_products.cell(row=row_num, column=1, value=product.item_id)
  1562. ws_products.cell(row=row_num, column=2, value=product.product_name)
  1563. ws_products.cell(row=row_num, column=3, value=product.product_type)
  1564. ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  1565. ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  1566. ws_products.cell(row=row_num, column=6, value=product.image_path)
  1567. # Adjust widths
  1568. ws_products.column_dimensions['A'].width = 15
  1569. ws_products.column_dimensions['B'].width = 25
  1570. ws_products.column_dimensions['C'].width = 15
  1571. ws_products.column_dimensions['D'].width = 35
  1572. ws_products.column_dimensions['E'].width = 50
  1573. ws_products.column_dimensions['F'].width = 45
  1574. # ===== Attribute_values Sheet =====
  1575. ws_attributes = wb.create_sheet("Attribute_values", 1)
  1576. attributes_headers = ['item_id', 'attribute_name', 'original_value']
  1577. for col_num, header in enumerate(attributes_headers, 1):
  1578. cell = ws_attributes.cell(row=1, column=col_num)
  1579. cell.value = header
  1580. cell.fill = header_fill
  1581. cell.font = header_font
  1582. cell.alignment = Alignment(horizontal="center", vertical="center")
  1583. # Fetch and add attribute values
  1584. attributes = ProductAttributeValue.objects.select_related('product').all()
  1585. for row_num, attr in enumerate(attributes, 2):
  1586. ws_attributes.cell(row=row_num, column=1, value=attr.product.item_id)
  1587. ws_attributes.cell(row=row_num, column=2, value=attr.attribute_name)
  1588. ws_attributes.cell(row=row_num, column=3, value=attr.original_value)
  1589. ws_attributes.column_dimensions['A'].width = 15
  1590. ws_attributes.column_dimensions['B'].width = 25
  1591. ws_attributes.column_dimensions['C'].width = 30
  1592. # Save to BytesIO
  1593. output = io.BytesIO()
  1594. wb.save(output)
  1595. output.seek(0)
  1596. response = HttpResponse(
  1597. output.getvalue(),
  1598. content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
  1599. )
  1600. response['Content-Disposition'] = 'attachment; filename=products_export.xlsx'
  1601. return response
  1602. # class ProductAttributesUploadView(APIView):
  1603. # """
  1604. # POST API to upload an Excel file and add mandatory/additional attributes
  1605. # for product types with possible values.
  1606. # """
  1607. # parser_classes = (MultiPartParser, FormParser)
  1608. # def post(self, request):
  1609. # file_obj = request.FILES.get('file')
  1610. # if not file_obj:
  1611. # return Response({"error": "No file provided."}, status=status.HTTP_400_BAD_REQUEST)
  1612. # try:
  1613. # df = pd.read_excel(file_obj)
  1614. # required_columns = {'product_type', 'attribute_name', 'is_mandatory', 'possible_values'}
  1615. # if not required_columns.issubset(df.columns):
  1616. # return Response({
  1617. # "error": f"Missing required columns. Found: {list(df.columns)}"
  1618. # }, status=status.HTTP_400_BAD_REQUEST)
  1619. # for _, row in df.iterrows():
  1620. # product_type_name = str(row['product_type']).strip()
  1621. # attr_name = str(row['attribute_name']).strip()
  1622. # is_mandatory = str(row['is_mandatory']).strip().lower() in ['yes', 'true', '1']
  1623. # possible_values = str(row.get('possible_values', '')).strip()
  1624. # # Get or create product type
  1625. # product_type, _ = ProductType.objects.get_or_create(name=product_type_name)
  1626. # # Get or create attribute
  1627. # attribute, _ = ProductAttribute.objects.get_or_create(
  1628. # product_type=product_type,
  1629. # name=attr_name,
  1630. # defaults={'is_mandatory': is_mandatory}
  1631. # )
  1632. # attribute.is_mandatory = is_mandatory
  1633. # attribute.save()
  1634. # # Handle possible values
  1635. # AttributePossibleValue.objects.filter(attribute=attribute).delete()
  1636. # if possible_values:
  1637. # for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1638. # AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1639. # return Response({"message": "Attributes uploaded successfully."}, status=status.HTTP_201_CREATED)
  1640. # except Exception as e:
  1641. # return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1642. from rest_framework.views import APIView
  1643. from rest_framework.response import Response
  1644. from rest_framework import status
  1645. from rest_framework.parsers import MultiPartParser, FormParser
  1646. import pandas as pd
  1647. from .models import ProductType, ProductAttribute, AttributePossibleValue
  1648. class ProductAttributesUploadView(APIView):
  1649. """
  1650. POST API to upload an Excel file and synchronize attributes for each product type.
  1651. Adds, updates, and deletes attributes and possible values.
  1652. """
  1653. parser_classes = (MultiPartParser, FormParser)
  1654. def post(self, request):
  1655. file_obj = request.FILES.get('file')
  1656. if not file_obj:
  1657. return Response({"error": "No file provided."}, status=status.HTTP_400_BAD_REQUEST)
  1658. try:
  1659. df = pd.read_excel(file_obj)
  1660. required_columns = {'product_type', 'attribute_name', 'is_mandatory', 'possible_values'}
  1661. if not required_columns.issubset(df.columns):
  1662. return Response({
  1663. "error": f"Missing required columns. Found: {list(df.columns)}"
  1664. }, status=status.HTTP_400_BAD_REQUEST)
  1665. # Normalize data
  1666. df['product_type'] = df['product_type'].astype(str).str.strip()
  1667. df['attribute_name'] = df['attribute_name'].astype(str).str.strip()
  1668. # Track processed attributes per product type
  1669. processed_attrs = {}
  1670. # Group by product type for deletion logic
  1671. for product_type_name, group_df in df.groupby('product_type'):
  1672. product_type, _ = ProductType.objects.get_or_create(name=product_type_name)
  1673. uploaded_attrs = set(group_df['attribute_name'].tolist())
  1674. # --- 🧹 Delete attributes that are NOT in the uploaded file ---
  1675. existing_attrs = set(
  1676. ProductAttribute.objects.filter(product_type=product_type)
  1677. .values_list('name', flat=True)
  1678. )
  1679. to_delete = existing_attrs - uploaded_attrs
  1680. if to_delete:
  1681. ProductAttribute.objects.filter(
  1682. product_type=product_type,
  1683. name__in=to_delete
  1684. ).delete()
  1685. # --- 🆕 Create / Update remaining attributes ---
  1686. for _, row in group_df.iterrows():
  1687. attr_name = str(row['attribute_name']).strip()
  1688. is_mandatory = str(row['is_mandatory']).strip().lower() in ['yes', 'true', '1']
  1689. possible_values = str(row.get('possible_values', '')).strip()
  1690. attribute, _ = ProductAttribute.objects.get_or_create(
  1691. product_type=product_type,
  1692. name=attr_name,
  1693. defaults={'is_mandatory': is_mandatory}
  1694. )
  1695. # Update mandatory flag
  1696. attribute.is_mandatory = is_mandatory
  1697. attribute.save()
  1698. # Replace possible values
  1699. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  1700. if possible_values:
  1701. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1702. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1703. processed_attrs[product_type_name] = {
  1704. "uploaded": list(uploaded_attrs),
  1705. "deleted": list(to_delete)
  1706. }
  1707. return Response({
  1708. "message": "Attributes synchronized successfully.",
  1709. "summary": processed_attrs
  1710. }, status=status.HTTP_201_CREATED)
  1711. except Exception as e:
  1712. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1713. class ProductTypeAttributesView(APIView):
  1714. """
  1715. API to view, create, update, and delete product type attributes and their possible values.
  1716. Also supports dynamic product type creation.
  1717. """
  1718. def get(self, request):
  1719. """
  1720. Retrieve all product types with their attributes and possible values.
  1721. """
  1722. product_types = ProductType.objects.all()
  1723. serializer = ProductTypeSerializer(product_types, many=True)
  1724. # Transform the serialized data into the requested format
  1725. result = []
  1726. for pt in serializer.data:
  1727. for attr in pt['attributes']:
  1728. result.append({
  1729. 'product_type': pt['name'],
  1730. 'attribute_name': attr['name'],
  1731. 'is_mandatory': 'Yes' if attr['is_mandatory'] else 'No',
  1732. 'possible_values': ', '.join([pv['value'] for pv in attr['possible_values']])
  1733. })
  1734. return Response(result, status=status.HTTP_200_OK)
  1735. def post(self, request):
  1736. """
  1737. Create a new product type or attribute with possible values.
  1738. Expected payload example:
  1739. {
  1740. "product_type": "Hardware Screws",
  1741. "attribute_name": "Material",
  1742. "is_mandatory": "Yes",
  1743. "possible_values": "Steel, Zinc Plated, Stainless Steel"
  1744. }
  1745. """
  1746. try:
  1747. product_type_name = request.data.get('product_type')
  1748. attribute_name = request.data.get('attribute_name', '')
  1749. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  1750. possible_values = request.data.get('possible_values', '')
  1751. if not product_type_name:
  1752. return Response({
  1753. "error": "product_type is required"
  1754. }, status=status.HTTP_400_BAD_REQUEST)
  1755. with transaction.atomic():
  1756. # Get or create product type
  1757. product_type, created = ProductType.objects.get_or_create(name=product_type_name)
  1758. if created and not attribute_name:
  1759. return Response({
  1760. "message": f"Product type '{product_type_name}' created successfully",
  1761. "data": {"product_type": product_type_name}
  1762. }, status=status.HTTP_201_CREATED)
  1763. if attribute_name:
  1764. # Create attribute
  1765. attribute, attr_created = ProductAttribute.objects.get_or_create(
  1766. product_type=product_type,
  1767. name=attribute_name,
  1768. defaults={'is_mandatory': is_mandatory}
  1769. )
  1770. if not attr_created:
  1771. return Response({
  1772. "error": f"Attribute '{attribute_name}' already exists for product type '{product_type_name}'"
  1773. }, status=status.HTTP_400_BAD_REQUEST)
  1774. # Handle possible values
  1775. if possible_values:
  1776. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1777. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1778. return Response({
  1779. "message": "Attribute created successfully",
  1780. "data": {
  1781. "product_type": product_type_name,
  1782. "attribute_name": attribute_name,
  1783. "is_mandatory": "Yes" if is_mandatory else "No",
  1784. "possible_values": possible_values
  1785. }
  1786. }, status=status.HTTP_201_CREATED)
  1787. return Response({
  1788. "message": f"Product type '{product_type_name}' already exists",
  1789. "data": {"product_type": product_type_name}
  1790. }, status=status.HTTP_200_OK)
  1791. except Exception as e:
  1792. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1793. def put(self, request):
  1794. """
  1795. Update an existing product type attribute and its possible values.
  1796. Expected payload example:
  1797. {
  1798. "product_type": "Hardware Screws",
  1799. "attribute_name": "Material",
  1800. "is_mandatory": "Yes",
  1801. "possible_values": "Steel, Zinc Plated, Stainless Steel, Brass"
  1802. }
  1803. """
  1804. try:
  1805. product_type_name = request.data.get('product_type')
  1806. attribute_name = request.data.get('attribute_name')
  1807. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  1808. possible_values = request.data.get('possible_values', '')
  1809. if not all([product_type_name, attribute_name]):
  1810. return Response({
  1811. "error": "product_type and attribute_name are required"
  1812. }, status=status.HTTP_400_BAD_REQUEST)
  1813. with transaction.atomic():
  1814. try:
  1815. product_type = ProductType.objects.get(name=product_type_name)
  1816. attribute = ProductAttribute.objects.get(
  1817. product_type=product_type,
  1818. name=attribute_name
  1819. )
  1820. except ProductType.DoesNotExist:
  1821. return Response({
  1822. "error": f"Product type '{product_type_name}' not found"
  1823. }, status=status.HTTP_404_NOT_FOUND)
  1824. except ProductAttribute.DoesNotExist:
  1825. return Response({
  1826. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  1827. }, status=status.HTTP_404_NOT_FOUND)
  1828. # Update attribute
  1829. attribute.is_mandatory = is_mandatory
  1830. attribute.save()
  1831. # Update possible values
  1832. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  1833. if possible_values:
  1834. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1835. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1836. return Response({
  1837. "message": "Attribute updated successfully",
  1838. "data": {
  1839. "product_type": product_type_name,
  1840. "attribute_name": attribute_name,
  1841. "is_mandatory": "Yes" if is_mandatory else "No",
  1842. "possible_values": possible_values
  1843. }
  1844. }, status=status.HTTP_200_OK)
  1845. except Exception as e:
  1846. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1847. def delete(self, request):
  1848. """
  1849. Delete a product type or a specific attribute.
  1850. Expected payload example:
  1851. {
  1852. "product_type": "Hardware Screws",
  1853. "attribute_name": "Material"
  1854. }
  1855. """
  1856. try:
  1857. product_type_name = request.data.get('product_type')
  1858. attribute_name = request.data.get('attribute_name', '')
  1859. if not product_type_name:
  1860. return Response({
  1861. "error": "product_type is required"
  1862. }, status=status.HTTP_400_BAD_REQUEST)
  1863. with transaction.atomic():
  1864. try:
  1865. product_type = ProductType.objects.get(name=product_type_name)
  1866. except ProductType.DoesNotExist:
  1867. return Response({
  1868. "error": f"Product type '{product_type_name}' not found"
  1869. }, status=status.HTTP_404_NOT_FOUND)
  1870. if attribute_name:
  1871. # Delete specific attribute
  1872. try:
  1873. attribute = ProductAttribute.objects.get(
  1874. product_type=product_type,
  1875. name=attribute_name
  1876. )
  1877. attribute.delete()
  1878. return Response({
  1879. "message": f"Attribute '{attribute_name}' deleted successfully from product type '{product_type_name}'"
  1880. }, status=status.HTTP_200_OK)
  1881. except ProductAttribute.DoesNotExist:
  1882. return Response({
  1883. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  1884. }, status=status.HTTP_404_NOT_FOUND)
  1885. else:
  1886. # Delete entire product type
  1887. product_type.delete()
  1888. return Response({
  1889. "message": f"Product type '{product_type_name}' and all its attributes deleted successfully"
  1890. }, status=status.HTTP_200_OK)
  1891. except Exception as e:
  1892. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1893. class ProductTypeListView(APIView):
  1894. """
  1895. GET API to list all product types (only names).
  1896. """
  1897. def get(self, request):
  1898. product_types = ProductType.objects.values_list('name', flat=True)
  1899. return Response({"product_types": list(product_types)}, status=status.HTTP_200_OK)
  1900. class ProductAttributeValueView(APIView):
  1901. """
  1902. API to manage manually entered original attribute values.
  1903. GET: Retrieve all attribute values for a product
  1904. POST: Create or update attribute values for a product
  1905. DELETE: Delete attribute values
  1906. """
  1907. def get(self, request):
  1908. """
  1909. Get original attribute values for a specific product or all products.
  1910. Query params: item_id (optional)
  1911. """
  1912. item_id = request.query_params.get('item_id')
  1913. if item_id:
  1914. try:
  1915. product = Product.objects.get(item_id=item_id)
  1916. values = ProductAttributeValue.objects.filter(product=product)
  1917. serializer = ProductAttributeValueSerializer(values, many=True)
  1918. return Response({
  1919. "item_id": item_id,
  1920. "attributes": serializer.data
  1921. }, status=status.HTTP_200_OK)
  1922. except Product.DoesNotExist:
  1923. return Response({
  1924. "error": f"Product with item_id '{item_id}' not found"
  1925. }, status=status.HTTP_404_NOT_FOUND)
  1926. else:
  1927. # Return all attribute values grouped by product
  1928. values = ProductAttributeValue.objects.all().select_related('product')
  1929. serializer = ProductAttributeValueSerializer(values, many=True)
  1930. return Response(serializer.data, status=status.HTTP_200_OK)
  1931. def post(self, request):
  1932. """
  1933. Create or update original attribute value for a product.
  1934. Expected payload:
  1935. {
  1936. "item_id": "3217373735",
  1937. "attribute_name": "Clothing Neck Style",
  1938. "original_value": "V-Neck Square"
  1939. }
  1940. """
  1941. serializer = ProductAttributeValueInputSerializer(data=request.data)
  1942. if not serializer.is_valid():
  1943. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  1944. validated_data = serializer.validated_data
  1945. item_id = validated_data['item_id']
  1946. attribute_name = validated_data['attribute_name']
  1947. original_value = validated_data['original_value']
  1948. try:
  1949. product = Product.objects.get(item_id=item_id)
  1950. except Product.DoesNotExist:
  1951. return Response({
  1952. "error": f"Product with item_id '{item_id}' not found"
  1953. }, status=status.HTTP_404_NOT_FOUND)
  1954. # Create or update the attribute value
  1955. attr_value, created = ProductAttributeValue.objects.update_or_create(
  1956. product=product,
  1957. attribute_name=attribute_name,
  1958. defaults={'original_value': original_value}
  1959. )
  1960. response_serializer = ProductAttributeValueSerializer(attr_value)
  1961. return Response({
  1962. "message": "Attribute value created" if created else "Attribute value updated",
  1963. "data": response_serializer.data
  1964. }, status=status.HTTP_201_CREATED if created else status.HTTP_200_OK)
  1965. def delete(self, request):
  1966. """
  1967. Delete original attribute value(s).
  1968. Expected payload:
  1969. {
  1970. "item_id": "3217373735",
  1971. "attribute_name": "Clothing Neck Style" # Optional, if not provided deletes all for product
  1972. }
  1973. """
  1974. item_id = request.data.get('item_id')
  1975. attribute_name = request.data.get('attribute_name')
  1976. if not item_id:
  1977. return Response({
  1978. "error": "item_id is required"
  1979. }, status=status.HTTP_400_BAD_REQUEST)
  1980. try:
  1981. product = Product.objects.get(item_id=item_id)
  1982. except Product.DoesNotExist:
  1983. return Response({
  1984. "error": f"Product with item_id '{item_id}' not found"
  1985. }, status=status.HTTP_404_NOT_FOUND)
  1986. if attribute_name:
  1987. # Delete specific attribute
  1988. deleted_count, _ = ProductAttributeValue.objects.filter(
  1989. product=product,
  1990. attribute_name=attribute_name
  1991. ).delete()
  1992. if deleted_count == 0:
  1993. return Response({
  1994. "error": f"Attribute '{attribute_name}' not found for product '{item_id}'"
  1995. }, status=status.HTTP_404_NOT_FOUND)
  1996. return Response({
  1997. "message": f"Attribute '{attribute_name}' deleted successfully"
  1998. }, status=status.HTTP_200_OK)
  1999. else:
  2000. # Delete all attributes for product
  2001. deleted_count, _ = ProductAttributeValue.objects.filter(product=product).delete()
  2002. return Response({
  2003. "message": f"Deleted {deleted_count} attribute(s) for product '{item_id}'"
  2004. }, status=status.HTTP_200_OK)
  2005. class BulkProductAttributeValueView(APIView):
  2006. """
  2007. API for bulk operations on original attribute values.
  2008. POST: Create/update multiple attribute values at once
  2009. """
  2010. def post(self, request):
  2011. """
  2012. Bulk create or update attribute values for multiple products.
  2013. Expected payload:
  2014. {
  2015. "products": [
  2016. {
  2017. "item_id": "3217373735",
  2018. "attributes": {
  2019. "Clothing Neck Style": "V-Neck Square",
  2020. "Condition": "New with tags"
  2021. }
  2022. },
  2023. {
  2024. "item_id": "1234567890",
  2025. "attributes": {
  2026. "Material": "Cotton",
  2027. "Size": "L"
  2028. }
  2029. }
  2030. ]
  2031. }
  2032. """
  2033. products_data = request.data.get('products', [])
  2034. if not products_data:
  2035. return Response({
  2036. "error": "products list is required"
  2037. }, status=status.HTTP_400_BAD_REQUEST)
  2038. results = []
  2039. successful = 0
  2040. failed = 0
  2041. with transaction.atomic():
  2042. for product_data in products_data:
  2043. serializer = BulkProductAttributeValueSerializer(data=product_data)
  2044. if not serializer.is_valid():
  2045. failed += 1
  2046. results.append({
  2047. "item_id": product_data.get('item_id'),
  2048. "status": "failed",
  2049. "error": serializer.errors
  2050. })
  2051. continue
  2052. validated_data = serializer.validated_data
  2053. item_id = validated_data['item_id']
  2054. attributes = validated_data['attributes']
  2055. try:
  2056. product = Product.objects.get(item_id=item_id)
  2057. created_count = 0
  2058. updated_count = 0
  2059. for attr_name, original_value in attributes.items():
  2060. _, created = ProductAttributeValue.objects.update_or_create(
  2061. product=product,
  2062. attribute_name=attr_name,
  2063. defaults={'original_value': original_value}
  2064. )
  2065. if created:
  2066. created_count += 1
  2067. else:
  2068. updated_count += 1
  2069. successful += 1
  2070. results.append({
  2071. "item_id": item_id,
  2072. "status": "success",
  2073. "created": created_count,
  2074. "updated": updated_count
  2075. })
  2076. except Product.DoesNotExist:
  2077. failed += 1
  2078. results.append({
  2079. "item_id": item_id,
  2080. "status": "failed",
  2081. "error": f"Product not found"
  2082. })
  2083. return Response({
  2084. "results": results,
  2085. "total_products": len(products_data),
  2086. "successful": successful,
  2087. "failed": failed
  2088. }, status=status.HTTP_200_OK)
  2089. class ProductListWithAttributesView(APIView):
  2090. """
  2091. GET API to list all products with their original attribute values.
  2092. """
  2093. def get(self, request):
  2094. item_id = request.query_params.get('item_id')
  2095. if item_id:
  2096. try:
  2097. product = Product.objects.get(item_id=item_id)
  2098. serializer = ProductWithAttributesSerializer(product)
  2099. return Response(serializer.data, status=status.HTTP_200_OK)
  2100. except Product.DoesNotExist:
  2101. return Response({
  2102. "error": f"Product with item_id '{item_id}' not found"
  2103. }, status=status.HTTP_404_NOT_FOUND)
  2104. else:
  2105. products = Product.objects.all()
  2106. serializer = ProductWithAttributesSerializer(products, many=True)
  2107. return Response(serializer.data, status=status.HTTP_200_OK)
  2108. class CacheManagementView(APIView):
  2109. """
  2110. API endpoint to manage caching system.
  2111. GET: Get current cache statistics and configuration
  2112. POST: Enable/disable caching or clear caches
  2113. """
  2114. def get(self, request):
  2115. """
  2116. Get current cache configuration and statistics.
  2117. """
  2118. config = cache_config.get_cache_config()
  2119. stats = ProductAttributeService.get_cache_stats()
  2120. return Response({
  2121. "configuration": config,
  2122. "statistics": stats,
  2123. "message": "Cache status retrieved successfully"
  2124. }, status=status.HTTP_200_OK)
  2125. def post(self, request):
  2126. """
  2127. Manage cache settings.
  2128. Expected payload examples:
  2129. 1. Enable/disable caching:
  2130. {
  2131. "action": "toggle",
  2132. "enable": true // or false
  2133. }
  2134. 2. Clear all caches:
  2135. {
  2136. "action": "clear"
  2137. }
  2138. 3. Clear specific cache:
  2139. {
  2140. "action": "clear",
  2141. "cache_type": "embedding" // or "attribute" or "clip"
  2142. }
  2143. 4. Get statistics:
  2144. {
  2145. "action": "stats"
  2146. }
  2147. """
  2148. action = request.data.get('action')
  2149. if not action:
  2150. return Response({
  2151. "error": "action is required",
  2152. "valid_actions": ["toggle", "clear", "stats"]
  2153. }, status=status.HTTP_400_BAD_REQUEST)
  2154. # Toggle caching on/off
  2155. if action == "toggle":
  2156. enable = request.data.get('enable')
  2157. if enable is None:
  2158. return Response({
  2159. "error": "enable parameter is required (true/false)"
  2160. }, status=status.HTTP_400_BAD_REQUEST)
  2161. # Update the cache configuration
  2162. cache_config.ENABLE_CACHING = bool(enable)
  2163. cache_config.ENABLE_ATTRIBUTE_EXTRACTION_CACHE = bool(enable)
  2164. cache_config.ENABLE_EMBEDDING_CACHE = bool(enable)
  2165. cache_config.ENABLE_CLIP_MODEL_CACHE = bool(enable)
  2166. status_msg = "enabled" if enable else "disabled"
  2167. return Response({
  2168. "message": f"Caching has been {status_msg}",
  2169. "configuration": cache_config.get_cache_config()
  2170. }, status=status.HTTP_200_OK)
  2171. # Clear caches
  2172. elif action == "clear":
  2173. cache_type = request.data.get('cache_type', 'all')
  2174. if cache_type == 'all':
  2175. ProductAttributeService.clear_all_caches()
  2176. VisualProcessingService.clear_clip_cache()
  2177. message = "All caches cleared successfully"
  2178. elif cache_type == 'embedding':
  2179. from .services import EmbeddingCache
  2180. EmbeddingCache.clear()
  2181. message = "Embedding cache cleared successfully"
  2182. elif cache_type == 'attribute':
  2183. from .services import SimpleCache
  2184. SimpleCache.clear()
  2185. message = "Attribute extraction cache cleared successfully"
  2186. elif cache_type == 'clip':
  2187. VisualProcessingService.clear_clip_cache()
  2188. message = "CLIP model cache cleared successfully"
  2189. else:
  2190. return Response({
  2191. "error": f"Invalid cache_type: {cache_type}",
  2192. "valid_types": ["all", "embedding", "attribute", "clip"]
  2193. }, status=status.HTTP_400_BAD_REQUEST)
  2194. return Response({
  2195. "message": message,
  2196. "statistics": ProductAttributeService.get_cache_stats()
  2197. }, status=status.HTTP_200_OK)
  2198. # Get statistics
  2199. elif action == "stats":
  2200. stats = ProductAttributeService.get_cache_stats()
  2201. config = cache_config.get_cache_config()
  2202. return Response({
  2203. "configuration": config,
  2204. "statistics": stats
  2205. }, status=status.HTTP_200_OK)
  2206. else:
  2207. return Response({
  2208. "error": f"Invalid action: {action}",
  2209. "valid_actions": ["toggle", "clear", "stats"]
  2210. }, status=status.HTTP_400_BAD_REQUEST)
  2211. class CacheStatsView(APIView):
  2212. """
  2213. Simple GET endpoint to retrieve cache statistics.
  2214. """
  2215. def get(self, request):
  2216. """Get current cache statistics."""
  2217. stats = ProductAttributeService.get_cache_stats()
  2218. config = cache_config.get_cache_config()
  2219. return Response({
  2220. "cache_enabled": config["master_cache_enabled"],
  2221. "statistics": stats,
  2222. "timestamp": datetime.now().isoformat()
  2223. }, status=status.HTTP_200_OK)
  2224. import os
  2225. import json
  2226. from django.http import JsonResponse
  2227. from django.conf import settings
  2228. from rest_framework.views import APIView
  2229. class ReadLocalJSONView(APIView):
  2230. """
  2231. API View to read a local JSON file and return its contents.
  2232. """
  2233. def post(self, request, *args, **kwargs):
  2234. try:
  2235. # Build absolute path to JSON file
  2236. file_path = os.path.join(
  2237. settings.BASE_DIR, 'attr_extraction', 'results', 'response35prod_additional_multiple.json'
  2238. )
  2239. # Check if file exists
  2240. if not os.path.exists(file_path):
  2241. return JsonResponse({'error': 'JSON file not found'}, status=404)
  2242. # Read and parse JSON content
  2243. with open(file_path, 'r', encoding='utf-8') as f:
  2244. data = json.load(f)
  2245. return JsonResponse(data, safe=False, status=200)
  2246. except json.JSONDecodeError:
  2247. return JsonResponse({'error': 'Invalid JSON format'}, status=500)
  2248. except Exception as e:
  2249. return JsonResponse({'error': str(e)}, status=500)