views.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. from rest_framework.views import APIView
  2. from rest_framework.response import Response
  3. from rest_framework import status
  4. from rest_framework.parsers import MultiPartParser, FormParser
  5. from django.db import transaction
  6. import pandas as pd
  7. from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
  8. from .serializers import (
  9. SingleProductRequestSerializer,
  10. BatchProductRequestSerializer,
  11. ProductAttributeResultSerializer,
  12. BatchProductResponseSerializer,
  13. ProductSerializer,
  14. ProductTypeSerializer,
  15. ProductAttributeSerializer,
  16. AttributePossibleValueSerializer
  17. )
  18. from .services import ProductAttributeService
  19. from .ocr_service import OCRService
  20. class ExtractProductAttributesView(APIView):
  21. """
  22. API endpoint to extract product attributes for a single product by item_id.
  23. Fetches product details from database with source tracking.
  24. Returns attributes in array format: [{"value": "...", "source": "..."}]
  25. """
  26. def post(self, request):
  27. serializer = SingleProductRequestSerializer(data=request.data)
  28. if not serializer.is_valid():
  29. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  30. validated_data = serializer.validated_data
  31. item_id = validated_data.get("item_id")
  32. # Fetch product from DB
  33. try:
  34. product = Product.objects.get(item_id=item_id)
  35. except Product.DoesNotExist:
  36. return Response(
  37. {"error": f"Product with item_id '{item_id}' not found."},
  38. status=status.HTTP_404_NOT_FOUND
  39. )
  40. # Extract product details
  41. title = product.product_name
  42. short_desc = product.product_short_description
  43. long_desc = product.product_long_description
  44. image_url = product.image_path
  45. # Process image for OCR if required
  46. ocr_results = None
  47. ocr_text = None
  48. if validated_data.get("process_image", True) and image_url:
  49. ocr_service = OCRService()
  50. ocr_results = ocr_service.process_image(image_url)
  51. if ocr_results and ocr_results.get("detected_text"):
  52. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  53. ocr_results, validated_data.get("model")
  54. )
  55. ocr_results["extracted_attributes"] = ocr_attrs
  56. ocr_text = "\n".join([
  57. f"{item['text']} (confidence: {item['confidence']:.2f})"
  58. for item in ocr_results["detected_text"]
  59. ])
  60. # Combine all product text with source tracking
  61. product_text, source_map = ProductAttributeService.combine_product_text(
  62. title=title,
  63. short_desc=short_desc,
  64. long_desc=long_desc,
  65. ocr_text=ocr_text
  66. )
  67. # Extract attributes with enhanced features and source tracking
  68. result = ProductAttributeService.extract_attributes(
  69. product_text=product_text,
  70. mandatory_attrs=validated_data["mandatory_attrs"],
  71. source_map=source_map,
  72. model=validated_data.get("model"),
  73. extract_additional=validated_data.get("extract_additional", True),
  74. multiple=validated_data.get("multiple", []),
  75. threshold_abs=validated_data.get("threshold_abs", 0.65),
  76. margin=validated_data.get("margin", 0.15),
  77. use_dynamic_thresholds=validated_data.get("use_dynamic_thresholds", True),
  78. use_adaptive_margin=validated_data.get("use_adaptive_margin", True),
  79. use_semantic_clustering=validated_data.get("use_semantic_clustering", True)
  80. )
  81. # Attach OCR results if available
  82. if ocr_results:
  83. result["ocr_results"] = ocr_results
  84. response_serializer = ProductAttributeResultSerializer(data=result)
  85. if response_serializer.is_valid():
  86. return Response(response_serializer.data, status=status.HTTP_200_OK)
  87. return Response(result, status=status.HTTP_200_OK)
  88. class BatchExtractProductAttributesView(APIView):
  89. """
  90. API endpoint to extract product attributes for multiple products in batch.
  91. Uses item-specific mandatory_attrs with source tracking.
  92. Returns attributes in array format: [{"value": "...", "source": "..."}]
  93. """
  94. def post(self, request):
  95. serializer = BatchProductRequestSerializer(data=request.data)
  96. if not serializer.is_valid():
  97. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  98. validated_data = serializer.validated_data
  99. # Get batch-level settings
  100. product_list = validated_data.get("products", [])
  101. model = validated_data.get("model")
  102. extract_additional = validated_data.get("extract_additional", True)
  103. process_image = validated_data.get("process_image", True)
  104. multiple = validated_data.get("multiple", [])
  105. threshold_abs = validated_data.get("threshold_abs", 0.65)
  106. margin = validated_data.get("margin", 0.15)
  107. use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
  108. use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
  109. use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
  110. # Extract all item_ids to query the database efficiently
  111. item_ids = [p['item_id'] for p in product_list]
  112. # Fetch all products in one query
  113. products_queryset = Product.objects.filter(item_id__in=item_ids)
  114. # Create a dictionary for easy lookup: item_id -> Product object
  115. product_map = {product.item_id: product for product in products_queryset}
  116. found_ids = set(product_map.keys())
  117. results = []
  118. successful = 0
  119. failed = 0
  120. for product_entry in product_list:
  121. item_id = product_entry['item_id']
  122. # Get item-specific mandatory attributes
  123. mandatory_attrs = product_entry['mandatory_attrs']
  124. if item_id not in found_ids:
  125. failed += 1
  126. results.append({
  127. "product_id": item_id,
  128. "error": "Product not found in database"
  129. })
  130. continue
  131. product = product_map[item_id]
  132. try:
  133. title = product.product_name
  134. short_desc = product.product_short_description
  135. long_desc = product.product_long_description
  136. image_url = product.image_path
  137. ocr_results = None
  138. ocr_text = None
  139. # Image Processing Logic
  140. if process_image and image_url:
  141. ocr_service = OCRService()
  142. ocr_results = ocr_service.process_image(image_url)
  143. if ocr_results and ocr_results.get("detected_text"):
  144. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  145. ocr_results, model
  146. )
  147. ocr_results["extracted_attributes"] = ocr_attrs
  148. ocr_text = "\n".join([
  149. f"{item['text']} (confidence: {item['confidence']:.2f})"
  150. for item in ocr_results["detected_text"]
  151. ])
  152. # Combine product text with source tracking
  153. product_text, source_map = ProductAttributeService.combine_product_text(
  154. title=title,
  155. short_desc=short_desc,
  156. long_desc=long_desc,
  157. ocr_text=ocr_text
  158. )
  159. # Attribute Extraction with source tracking (returns array format)
  160. extracted = ProductAttributeService.extract_attributes(
  161. product_text=product_text,
  162. mandatory_attrs=mandatory_attrs,
  163. source_map=source_map,
  164. model=model,
  165. extract_additional=extract_additional,
  166. multiple=multiple,
  167. threshold_abs=threshold_abs,
  168. margin=margin,
  169. use_dynamic_thresholds=use_dynamic_thresholds,
  170. use_adaptive_margin=use_adaptive_margin,
  171. use_semantic_clustering=use_semantic_clustering
  172. )
  173. result = {
  174. "product_id": product.item_id,
  175. "mandatory": extracted.get("mandatory", {}),
  176. "additional": extracted.get("additional", {}),
  177. }
  178. if ocr_results:
  179. result["ocr_results"] = ocr_results
  180. results.append(result)
  181. successful += 1
  182. except Exception as e:
  183. failed += 1
  184. results.append({
  185. "product_id": item_id,
  186. "error": str(e)
  187. })
  188. batch_result = {
  189. "results": results,
  190. "total_products": len(product_list),
  191. "successful": successful,
  192. "failed": failed
  193. }
  194. response_serializer = BatchProductResponseSerializer(data=batch_result)
  195. if response_serializer.is_valid():
  196. return Response(response_serializer.data, status=status.HTTP_200_OK)
  197. return Response(batch_result, status=status.HTTP_200_OK)
  198. class ProductListView(APIView):
  199. """
  200. GET API to list all products with details
  201. """
  202. def get(self, request):
  203. products = Product.objects.all()
  204. serializer = ProductSerializer(products, many=True)
  205. return Response(serializer.data, status=status.HTTP_200_OK)
  206. class ProductUploadExcelView(APIView):
  207. """
  208. POST API to upload an Excel file and add data to Product model (skip duplicates)
  209. """
  210. parser_classes = (MultiPartParser, FormParser)
  211. def post(self, request, *args, **kwargs):
  212. file_obj = request.FILES.get('file')
  213. if not file_obj:
  214. return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  215. try:
  216. df = pd.read_excel(file_obj)
  217. df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
  218. expected_cols = {
  219. 'item_id',
  220. 'product_name',
  221. 'product_long_description',
  222. 'product_short_description',
  223. 'product_type',
  224. 'image_path'
  225. }
  226. if not expected_cols.issubset(df.columns):
  227. return Response({
  228. 'error': 'Missing required columns',
  229. 'required_columns': list(expected_cols)
  230. }, status=status.HTTP_400_BAD_REQUEST)
  231. created_count = 0
  232. skipped_count = 0
  233. for _, row in df.iterrows():
  234. item_id = row.get('item_id', '')
  235. # Check if this item already exists
  236. if Product.objects.filter(item_id=item_id).exists():
  237. skipped_count += 1
  238. continue
  239. Product.objects.create(
  240. item_id=item_id,
  241. product_name=row.get('product_name', ''),
  242. product_long_description=row.get('product_long_description', ''),
  243. product_short_description=row.get('product_short_description', ''),
  244. product_type=row.get('product_type', ''),
  245. image_path=row.get('image_path', ''),
  246. )
  247. created_count += 1
  248. return Response({
  249. 'message': f'Successfully uploaded {created_count} products.',
  250. 'skipped': f'Skipped {skipped_count} duplicates.'
  251. }, status=status.HTTP_201_CREATED)
  252. except Exception as e:
  253. return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  254. class ProductAttributesUploadView(APIView):
  255. """
  256. POST API to upload an Excel file and add mandatory/additional attributes
  257. for product types with possible values.
  258. """
  259. parser_classes = (MultiPartParser, FormParser)
  260. def post(self, request):
  261. file_obj = request.FILES.get('file')
  262. if not file_obj:
  263. return Response({"error": "No file provided."}, status=status.HTTP_400_BAD_REQUEST)
  264. try:
  265. df = pd.read_excel(file_obj)
  266. required_columns = {'product_type', 'attribute_name', 'is_mandatory', 'possible_values'}
  267. if not required_columns.issubset(df.columns):
  268. return Response({
  269. "error": f"Missing required columns. Found: {list(df.columns)}"
  270. }, status=status.HTTP_400_BAD_REQUEST)
  271. for _, row in df.iterrows():
  272. product_type_name = str(row['product_type']).strip()
  273. attr_name = str(row['attribute_name']).strip()
  274. is_mandatory = str(row['is_mandatory']).strip().lower() in ['yes', 'true', '1']
  275. possible_values = str(row.get('possible_values', '')).strip()
  276. # Get or create product type
  277. product_type, _ = ProductType.objects.get_or_create(name=product_type_name)
  278. # Get or create attribute
  279. attribute, _ = ProductAttribute.objects.get_or_create(
  280. product_type=product_type,
  281. name=attr_name,
  282. defaults={'is_mandatory': is_mandatory}
  283. )
  284. attribute.is_mandatory = is_mandatory
  285. attribute.save()
  286. # Handle possible values
  287. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  288. if possible_values:
  289. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  290. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  291. return Response({"message": "Attributes uploaded successfully."}, status=status.HTTP_201_CREATED)
  292. except Exception as e:
  293. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  294. class ProductTypeAttributesView(APIView):
  295. """
  296. API to view, create, update, and delete product type attributes and their possible values.
  297. Also supports dynamic product type creation.
  298. """
  299. def get(self, request):
  300. """
  301. Retrieve all product types with their attributes and possible values.
  302. """
  303. product_types = ProductType.objects.all()
  304. serializer = ProductTypeSerializer(product_types, many=True)
  305. # Transform the serialized data into the requested format
  306. result = []
  307. for pt in serializer.data:
  308. for attr in pt['attributes']:
  309. result.append({
  310. 'product_type': pt['name'],
  311. 'attribute_name': attr['name'],
  312. 'is_mandatory': 'Yes' if attr['is_mandatory'] else 'No',
  313. 'possible_values': ', '.join([pv['value'] for pv in attr['possible_values']])
  314. })
  315. return Response(result, status=status.HTTP_200_OK)
  316. def post(self, request):
  317. """
  318. Create a new product type or attribute with possible values.
  319. Expected payload example:
  320. {
  321. "product_type": "Hardware Screws",
  322. "attribute_name": "Material",
  323. "is_mandatory": "Yes",
  324. "possible_values": "Steel, Zinc Plated, Stainless Steel"
  325. }
  326. """
  327. try:
  328. product_type_name = request.data.get('product_type')
  329. attribute_name = request.data.get('attribute_name', '')
  330. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  331. possible_values = request.data.get('possible_values', '')
  332. if not product_type_name:
  333. return Response({
  334. "error": "product_type is required"
  335. }, status=status.HTTP_400_BAD_REQUEST)
  336. with transaction.atomic():
  337. # Get or create product type
  338. product_type, created = ProductType.objects.get_or_create(name=product_type_name)
  339. if created and not attribute_name:
  340. return Response({
  341. "message": f"Product type '{product_type_name}' created successfully",
  342. "data": {"product_type": product_type_name}
  343. }, status=status.HTTP_201_CREATED)
  344. if attribute_name:
  345. # Create attribute
  346. attribute, attr_created = ProductAttribute.objects.get_or_create(
  347. product_type=product_type,
  348. name=attribute_name,
  349. defaults={'is_mandatory': is_mandatory}
  350. )
  351. if not attr_created:
  352. return Response({
  353. "error": f"Attribute '{attribute_name}' already exists for product type '{product_type_name}'"
  354. }, status=status.HTTP_400_BAD_REQUEST)
  355. # Handle possible values
  356. if possible_values:
  357. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  358. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  359. return Response({
  360. "message": "Attribute created successfully",
  361. "data": {
  362. "product_type": product_type_name,
  363. "attribute_name": attribute_name,
  364. "is_mandatory": "Yes" if is_mandatory else "No",
  365. "possible_values": possible_values
  366. }
  367. }, status=status.HTTP_201_CREATED)
  368. return Response({
  369. "message": f"Product type '{product_type_name}' already exists",
  370. "data": {"product_type": product_type_name}
  371. }, status=status.HTTP_200_OK)
  372. except Exception as e:
  373. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  374. def put(self, request):
  375. """
  376. Update an existing product type attribute and its possible values.
  377. Expected payload example:
  378. {
  379. "product_type": "Hardware Screws",
  380. "attribute_name": "Material",
  381. "is_mandatory": "Yes",
  382. "possible_values": "Steel, Zinc Plated, Stainless Steel, Brass"
  383. }
  384. """
  385. try:
  386. product_type_name = request.data.get('product_type')
  387. attribute_name = request.data.get('attribute_name')
  388. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  389. possible_values = request.data.get('possible_values', '')
  390. if not all([product_type_name, attribute_name]):
  391. return Response({
  392. "error": "product_type and attribute_name are required"
  393. }, status=status.HTTP_400_BAD_REQUEST)
  394. with transaction.atomic():
  395. try:
  396. product_type = ProductType.objects.get(name=product_type_name)
  397. attribute = ProductAttribute.objects.get(
  398. product_type=product_type,
  399. name=attribute_name
  400. )
  401. except ProductType.DoesNotExist:
  402. return Response({
  403. "error": f"Product type '{product_type_name}' not found"
  404. }, status=status.HTTP_404_NOT_FOUND)
  405. except ProductAttribute.DoesNotExist:
  406. return Response({
  407. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  408. }, status=status.HTTP_404_NOT_FOUND)
  409. # Update attribute
  410. attribute.is_mandatory = is_mandatory
  411. attribute.save()
  412. # Update possible values
  413. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  414. if possible_values:
  415. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  416. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  417. return Response({
  418. "message": "Attribute updated successfully",
  419. "data": {
  420. "product_type": product_type_name,
  421. "attribute_name": attribute_name,
  422. "is_mandatory": "Yes" if is_mandatory else "No",
  423. "possible_values": possible_values
  424. }
  425. }, status=status.HTTP_200_OK)
  426. except Exception as e:
  427. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  428. def delete(self, request):
  429. """
  430. Delete a product type or a specific attribute.
  431. Expected payload example:
  432. {
  433. "product_type": "Hardware Screws",
  434. "attribute_name": "Material"
  435. }
  436. """
  437. try:
  438. product_type_name = request.data.get('product_type')
  439. attribute_name = request.data.get('attribute_name', '')
  440. if not product_type_name:
  441. return Response({
  442. "error": "product_type is required"
  443. }, status=status.HTTP_400_BAD_REQUEST)
  444. with transaction.atomic():
  445. try:
  446. product_type = ProductType.objects.get(name=product_type_name)
  447. except ProductType.DoesNotExist:
  448. return Response({
  449. "error": f"Product type '{product_type_name}' not found"
  450. }, status=status.HTTP_404_NOT_FOUND)
  451. if attribute_name:
  452. # Delete specific attribute
  453. try:
  454. attribute = ProductAttribute.objects.get(
  455. product_type=product_type,
  456. name=attribute_name
  457. )
  458. attribute.delete()
  459. return Response({
  460. "message": f"Attribute '{attribute_name}' deleted successfully from product type '{product_type_name}'"
  461. }, status=status.HTTP_200_OK)
  462. except ProductAttribute.DoesNotExist:
  463. return Response({
  464. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  465. }, status=status.HTTP_404_NOT_FOUND)
  466. else:
  467. # Delete entire product type
  468. product_type.delete()
  469. return Response({
  470. "message": f"Product type '{product_type_name}' and all its attributes deleted successfully"
  471. }, status=status.HTTP_200_OK)
  472. except Exception as e:
  473. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  474. class ProductTypeListView(APIView):
  475. """
  476. GET API to list all product types (only names).
  477. """
  478. def get(self, request):
  479. product_types = ProductType.objects.values_list('name', flat=True)
  480. return Response({"product_types": list(product_types)}, status=status.HTTP_200_OK)