|
@@ -235,71 +235,6 @@ from .services import ProductAttributeService
|
|
|
from .ocr_service import OCRService
|
|
from .ocr_service import OCRService
|
|
|
|
|
|
|
|
|
|
|
|
|
-# class ExtractProductAttributesView(APIView):
|
|
|
|
|
-# """
|
|
|
|
|
-# API endpoint to extract product attributes for a single product.
|
|
|
|
|
-# Now supports image URL for OCR-based text extraction.
|
|
|
|
|
-# """
|
|
|
|
|
-
|
|
|
|
|
-# def post(self, request):
|
|
|
|
|
-# serializer = SingleProductRequestSerializer(data=request.data)
|
|
|
|
|
-# if not serializer.is_valid():
|
|
|
|
|
-# return Response(
|
|
|
|
|
-# {"error": serializer.errors},
|
|
|
|
|
-# status=status.HTTP_400_BAD_REQUEST
|
|
|
|
|
-# )
|
|
|
|
|
-
|
|
|
|
|
-# validated_data = serializer.validated_data
|
|
|
|
|
-
|
|
|
|
|
-# # Process image if URL provided
|
|
|
|
|
-# ocr_results = None
|
|
|
|
|
-# ocr_text = None
|
|
|
|
|
-
|
|
|
|
|
-# if validated_data.get('process_image', True) and validated_data.get('image_url'):
|
|
|
|
|
-# ocr_service = OCRService()
|
|
|
|
|
-# ocr_results = ocr_service.process_image(validated_data['image_url'])
|
|
|
|
|
-
|
|
|
|
|
-# # Extract attributes from OCR
|
|
|
|
|
-# if ocr_results and ocr_results.get('detected_text'):
|
|
|
|
|
-# ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
|
|
|
|
|
-# ocr_results,
|
|
|
|
|
-# validated_data.get('model')
|
|
|
|
|
-# )
|
|
|
|
|
-# ocr_results['extracted_attributes'] = ocr_attrs
|
|
|
|
|
-
|
|
|
|
|
-# # Format OCR text
|
|
|
|
|
-# ocr_text = "\n".join([
|
|
|
|
|
-# f"{item['text']} (confidence: {item['confidence']:.2f})"
|
|
|
|
|
-# for item in ocr_results['detected_text']
|
|
|
|
|
-# ])
|
|
|
|
|
-
|
|
|
|
|
-# # Combine all product information
|
|
|
|
|
-# product_text = ProductAttributeService.combine_product_text(
|
|
|
|
|
-# title=validated_data.get('title'),
|
|
|
|
|
-# short_desc=validated_data.get('short_desc'),
|
|
|
|
|
-# long_desc=validated_data.get('long_desc'),
|
|
|
|
|
-# ocr_text=ocr_text
|
|
|
|
|
-# )
|
|
|
|
|
-
|
|
|
|
|
-# # Extract attributes
|
|
|
|
|
-# result = ProductAttributeService.extract_attributes(
|
|
|
|
|
-# product_text=product_text,
|
|
|
|
|
-# mandatory_attrs=validated_data['mandatory_attrs'],
|
|
|
|
|
-# model=validated_data.get('model'),
|
|
|
|
|
-# extract_additional=validated_data.get('extract_additional', True)
|
|
|
|
|
-# )
|
|
|
|
|
-
|
|
|
|
|
-# # Add OCR results if available
|
|
|
|
|
-# if ocr_results:
|
|
|
|
|
-# result['ocr_results'] = ocr_results
|
|
|
|
|
-
|
|
|
|
|
-# response_serializer = ProductAttributeResultSerializer(data=result)
|
|
|
|
|
-# if response_serializer.is_valid():
|
|
|
|
|
-# return Response(response_serializer.data, status=status.HTTP_200_OK)
|
|
|
|
|
-
|
|
|
|
|
-# return Response(result, status=status.HTTP_200_OK)
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
from .models import Product
|
|
from .models import Product
|
|
|
|
|
|
|
|
class ExtractProductAttributesView(APIView):
|
|
class ExtractProductAttributesView(APIView):
|
|
@@ -377,44 +312,118 @@ class ExtractProductAttributesView(APIView):
|
|
|
return Response(result, status=status.HTTP_200_OK)
|
|
return Response(result, status=status.HTTP_200_OK)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+from .models import Product
|
|
|
|
|
+
|
|
|
# class BatchExtractProductAttributesView(APIView):
|
|
# class BatchExtractProductAttributesView(APIView):
|
|
|
# """
|
|
# """
|
|
|
-# API endpoint to extract product attributes for multiple products in batch.
|
|
|
|
|
-# Now supports image URLs for OCR-based text extraction.
|
|
|
|
|
|
|
+# API endpoint to extract product attributes for multiple products in batch by item_id.
|
|
|
|
|
+# Fetches all product details from database automatically.
|
|
|
# """
|
|
# """
|
|
|
|
|
|
|
|
# def post(self, request):
|
|
# def post(self, request):
|
|
|
# serializer = BatchProductRequestSerializer(data=request.data)
|
|
# serializer = BatchProductRequestSerializer(data=request.data)
|
|
|
# if not serializer.is_valid():
|
|
# if not serializer.is_valid():
|
|
|
-# return Response(
|
|
|
|
|
-# {"error": serializer.errors},
|
|
|
|
|
-# status=status.HTTP_400_BAD_REQUEST
|
|
|
|
|
-# )
|
|
|
|
|
|
|
+# return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
|
|
|
|
# validated_data = serializer.validated_data
|
|
# validated_data = serializer.validated_data
|
|
|
|
|
+# item_ids = validated_data.get("item_ids", [])
|
|
|
|
|
+# model = validated_data.get("model")
|
|
|
|
|
+# extract_additional = validated_data.get("extract_additional", True)
|
|
|
|
|
+# process_image = validated_data.get("process_image", True)
|
|
|
|
|
+# mandatory_attrs = validated_data["mandatory_attrs"]
|
|
|
|
|
+
|
|
|
|
|
+# # Fetch all products in one query
|
|
|
|
|
+# products = Product.objects.filter(item_id__in=item_ids)
|
|
|
|
|
+# found_ids = set(products.values_list("item_id", flat=True))
|
|
|
|
|
+# missing_ids = [pid for pid in item_ids if pid not in found_ids]
|
|
|
|
|
+
|
|
|
|
|
+# results = []
|
|
|
|
|
+# successful = 0
|
|
|
|
|
+# failed = 0
|
|
|
|
|
+
|
|
|
|
|
+# for product in products:
|
|
|
|
|
+# try:
|
|
|
|
|
+# title = product.product_name
|
|
|
|
|
+# short_desc = product.product_short_description
|
|
|
|
|
+# long_desc = product.product_long_description
|
|
|
|
|
+# image_url = product.image_path
|
|
|
|
|
+
|
|
|
|
|
+# ocr_results = None
|
|
|
|
|
+# ocr_text = None
|
|
|
|
|
+
|
|
|
|
|
+# if process_image and image_url:
|
|
|
|
|
+# ocr_service = OCRService()
|
|
|
|
|
+# ocr_results = ocr_service.process_image(image_url)
|
|
|
|
|
+
|
|
|
|
|
+# if ocr_results and ocr_results.get("detected_text"):
|
|
|
|
|
+# ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
|
|
|
|
|
+# ocr_results, model
|
|
|
|
|
+# )
|
|
|
|
|
+# ocr_results["extracted_attributes"] = ocr_attrs
|
|
|
|
|
+# ocr_text = "\n".join([
|
|
|
|
|
+# f"{item['text']} (confidence: {item['confidence']:.2f})"
|
|
|
|
|
+# for item in ocr_results["detected_text"]
|
|
|
|
|
+# ])
|
|
|
|
|
+
|
|
|
|
|
+# product_text = ProductAttributeService.combine_product_text(
|
|
|
|
|
+# title=title,
|
|
|
|
|
+# short_desc=short_desc,
|
|
|
|
|
+# long_desc=long_desc,
|
|
|
|
|
+# ocr_text=ocr_text
|
|
|
|
|
+# )
|
|
|
|
|
|
|
|
-# # Extract attributes for all products in batch
|
|
|
|
|
-# result = ProductAttributeService.extract_attributes_batch(
|
|
|
|
|
-# products=validated_data['products'],
|
|
|
|
|
-# mandatory_attrs=validated_data['mandatory_attrs'],
|
|
|
|
|
-# model=validated_data.get('model'),
|
|
|
|
|
-# extract_additional=validated_data.get('extract_additional', True),
|
|
|
|
|
-# process_image=validated_data.get('process_image', True)
|
|
|
|
|
-# )
|
|
|
|
|
|
|
+# extracted = ProductAttributeService.extract_attributes(
|
|
|
|
|
+# product_text=product_text,
|
|
|
|
|
+# mandatory_attrs=mandatory_attrs,
|
|
|
|
|
+# model=model,
|
|
|
|
|
+# extract_additional=extract_additional
|
|
|
|
|
+# )
|
|
|
|
|
|
|
|
-# response_serializer = BatchProductResponseSerializer(data=result)
|
|
|
|
|
|
|
+# result = {
|
|
|
|
|
+# "product_id": product.item_id,
|
|
|
|
|
+# "mandatory": extracted.get("mandatory", {}),
|
|
|
|
|
+# "additional": extracted.get("additional", {}),
|
|
|
|
|
+# }
|
|
|
|
|
+
|
|
|
|
|
+# if ocr_results:
|
|
|
|
|
+# result["ocr_results"] = ocr_results
|
|
|
|
|
+
|
|
|
|
|
+# results.append(result)
|
|
|
|
|
+# successful += 1
|
|
|
|
|
+
|
|
|
|
|
+# except Exception as e:
|
|
|
|
|
+# failed += 1
|
|
|
|
|
+# results.append({
|
|
|
|
|
+# "product_id": product.item_id,
|
|
|
|
|
+# "error": str(e)
|
|
|
|
|
+# })
|
|
|
|
|
+
|
|
|
|
|
+# # Add missing item_ids as failed entries
|
|
|
|
|
+# for mid in missing_ids:
|
|
|
|
|
+# failed += 1
|
|
|
|
|
+# results.append({
|
|
|
|
|
+# "product_id": mid,
|
|
|
|
|
+# "error": "Product not found in database"
|
|
|
|
|
+# })
|
|
|
|
|
+
|
|
|
|
|
+# batch_result = {
|
|
|
|
|
+# "results": results,
|
|
|
|
|
+# "total_products": len(item_ids),
|
|
|
|
|
+# "successful": successful,
|
|
|
|
|
+# "failed": failed
|
|
|
|
|
+# }
|
|
|
|
|
+
|
|
|
|
|
+# response_serializer = BatchProductResponseSerializer(data=batch_result)
|
|
|
# if response_serializer.is_valid():
|
|
# if response_serializer.is_valid():
|
|
|
# return Response(response_serializer.data, status=status.HTTP_200_OK)
|
|
# return Response(response_serializer.data, status=status.HTTP_200_OK)
|
|
|
-
|
|
|
|
|
-# return Response(result, status=status.HTTP_200_OK)
|
|
|
|
|
|
|
|
|
|
|
|
+# return Response(batch_result, status=status.HTTP_200_OK)
|
|
|
|
|
|
|
|
-from .models import Product
|
|
|
|
|
|
|
|
|
|
class BatchExtractProductAttributesView(APIView):
|
|
class BatchExtractProductAttributesView(APIView):
|
|
|
"""
|
|
"""
|
|
|
- API endpoint to extract product attributes for multiple products in batch by item_id.
|
|
|
|
|
- Fetches all product details from database automatically.
|
|
|
|
|
|
|
+ API endpoint to extract product attributes for multiple products in batch.
|
|
|
|
|
+ Uses item-specific mandatory_attrs.
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
def post(self, request):
|
|
def post(self, request):
|
|
@@ -423,22 +432,42 @@ class BatchExtractProductAttributesView(APIView):
|
|
|
return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
|
|
return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
|
|
|
|
validated_data = serializer.validated_data
|
|
validated_data = serializer.validated_data
|
|
|
- item_ids = validated_data.get("item_ids", [])
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # Get batch-level settings
|
|
|
|
|
+ product_list = validated_data.get("products", []) # New: list of {item_id, mandatory_attrs}
|
|
|
model = validated_data.get("model")
|
|
model = validated_data.get("model")
|
|
|
extract_additional = validated_data.get("extract_additional", True)
|
|
extract_additional = validated_data.get("extract_additional", True)
|
|
|
process_image = validated_data.get("process_image", True)
|
|
process_image = validated_data.get("process_image", True)
|
|
|
- mandatory_attrs = validated_data["mandatory_attrs"]
|
|
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # Extract all item_ids to query the database efficiently
|
|
|
|
|
+ item_ids = [p['item_id'] for p in product_list]
|
|
|
|
|
+
|
|
|
# Fetch all products in one query
|
|
# Fetch all products in one query
|
|
|
- products = Product.objects.filter(item_id__in=item_ids)
|
|
|
|
|
- found_ids = set(products.values_list("item_id", flat=True))
|
|
|
|
|
- missing_ids = [pid for pid in item_ids if pid not in found_ids]
|
|
|
|
|
-
|
|
|
|
|
|
|
+ products_queryset = Product.objects.filter(item_id__in=item_ids)
|
|
|
|
|
+
|
|
|
|
|
+ # Create a dictionary for easy lookup: item_id -> Product object
|
|
|
|
|
+ product_map = {product.item_id: product for product in products_queryset}
|
|
|
|
|
+ found_ids = set(product_map.keys())
|
|
|
|
|
+
|
|
|
results = []
|
|
results = []
|
|
|
successful = 0
|
|
successful = 0
|
|
|
failed = 0
|
|
failed = 0
|
|
|
|
|
|
|
|
- for product in products:
|
|
|
|
|
|
|
+ for product_entry in product_list:
|
|
|
|
|
+ item_id = product_entry['item_id']
|
|
|
|
|
+ # Get item-specific mandatory attributes
|
|
|
|
|
+ mandatory_attrs = product_entry['mandatory_attrs']
|
|
|
|
|
+
|
|
|
|
|
+ if item_id not in found_ids:
|
|
|
|
|
+ failed += 1
|
|
|
|
|
+ results.append({
|
|
|
|
|
+ "product_id": item_id,
|
|
|
|
|
+ "error": "Product not found in database"
|
|
|
|
|
+ })
|
|
|
|
|
+ continue # Skip to the next product
|
|
|
|
|
+
|
|
|
|
|
+ product = product_map[item_id]
|
|
|
|
|
+
|
|
|
try:
|
|
try:
|
|
|
title = product.product_name
|
|
title = product.product_name
|
|
|
short_desc = product.product_short_description
|
|
short_desc = product.product_short_description
|
|
@@ -448,11 +477,14 @@ class BatchExtractProductAttributesView(APIView):
|
|
|
ocr_results = None
|
|
ocr_results = None
|
|
|
ocr_text = None
|
|
ocr_text = None
|
|
|
|
|
|
|
|
|
|
+ # Image Processing Logic (same as before)
|
|
|
if process_image and image_url:
|
|
if process_image and image_url:
|
|
|
ocr_service = OCRService()
|
|
ocr_service = OCRService()
|
|
|
ocr_results = ocr_service.process_image(image_url)
|
|
ocr_results = ocr_service.process_image(image_url)
|
|
|
|
|
|
|
|
if ocr_results and ocr_results.get("detected_text"):
|
|
if ocr_results and ocr_results.get("detected_text"):
|
|
|
|
|
+ # Ensure the services are designed to handle 'mandatory_attrs'
|
|
|
|
|
+ # for attribute extraction from OCR text
|
|
|
ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
|
|
ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
|
|
|
ocr_results, model
|
|
ocr_results, model
|
|
|
)
|
|
)
|
|
@@ -469,9 +501,10 @@ class BatchExtractProductAttributesView(APIView):
|
|
|
ocr_text=ocr_text
|
|
ocr_text=ocr_text
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+ # Attribute Extraction Logic - NOW USING ITEM-SPECIFIC mandatory_attrs
|
|
|
extracted = ProductAttributeService.extract_attributes(
|
|
extracted = ProductAttributeService.extract_attributes(
|
|
|
product_text=product_text,
|
|
product_text=product_text,
|
|
|
- mandatory_attrs=mandatory_attrs,
|
|
|
|
|
|
|
+ mandatory_attrs=mandatory_attrs, # <--- Changed: now item-specific
|
|
|
model=model,
|
|
model=model,
|
|
|
extract_additional=extract_additional
|
|
extract_additional=extract_additional
|
|
|
)
|
|
)
|
|
@@ -491,21 +524,17 @@ class BatchExtractProductAttributesView(APIView):
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
failed += 1
|
|
failed += 1
|
|
|
results.append({
|
|
results.append({
|
|
|
- "product_id": product.item_id,
|
|
|
|
|
|
|
+ "product_id": item_id,
|
|
|
"error": str(e)
|
|
"error": str(e)
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
- # Add missing item_ids as failed entries
|
|
|
|
|
- for mid in missing_ids:
|
|
|
|
|
- failed += 1
|
|
|
|
|
- results.append({
|
|
|
|
|
- "product_id": mid,
|
|
|
|
|
- "error": "Product not found in database"
|
|
|
|
|
- })
|
|
|
|
|
|
|
+ # No need for a separate missing_ids loop since we handle it when iterating over product_list
|
|
|
|
|
+ # The list comprehension `item_ids = [p['item_id'] for p in product_list]` and the check
|
|
|
|
|
+ # `if item_id not in found_ids:` now correctly handle missing products from the input list.
|
|
|
|
|
|
|
|
batch_result = {
|
|
batch_result = {
|
|
|
"results": results,
|
|
"results": results,
|
|
|
- "total_products": len(item_ids),
|
|
|
|
|
|
|
+ "total_products": len(product_list),
|
|
|
"successful": successful,
|
|
"successful": successful,
|
|
|
"failed": failed
|
|
"failed": failed
|
|
|
}
|
|
}
|
|
@@ -536,8 +565,6 @@ class ProductListView(APIView):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
import pandas as pd
|
|
import pandas as pd
|
|
|
from rest_framework.parsers import MultiPartParser, FormParser
|
|
from rest_framework.parsers import MultiPartParser, FormParser
|
|
|
from rest_framework.views import APIView
|
|
from rest_framework.views import APIView
|
|
@@ -547,62 +574,6 @@ from .models import Product
|
|
|
from .serializers import ProductSerializer
|
|
from .serializers import ProductSerializer
|
|
|
|
|
|
|
|
|
|
|
|
|
-# class ProductUploadExcelView(APIView):
|
|
|
|
|
-# """
|
|
|
|
|
-# POST API to upload an Excel file and add data to Product model
|
|
|
|
|
-# """
|
|
|
|
|
-# parser_classes = (MultiPartParser, FormParser)
|
|
|
|
|
-
|
|
|
|
|
-# def post(self, request, *args, **kwargs):
|
|
|
|
|
-# file_obj = request.FILES.get('file')
|
|
|
|
|
-# if not file_obj:
|
|
|
|
|
-# return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
|
-
|
|
|
|
|
-# try:
|
|
|
|
|
-# # Read the Excel file
|
|
|
|
|
-# df = pd.read_excel(file_obj)
|
|
|
|
|
-
|
|
|
|
|
-# # Normalize column names
|
|
|
|
|
-# df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
|
|
|
|
|
-
|
|
|
|
|
-# # Expected columns
|
|
|
|
|
-# expected_cols = {
|
|
|
|
|
-# 'item_id',
|
|
|
|
|
-# 'product_name',
|
|
|
|
|
-# 'product_long_description',
|
|
|
|
|
-# 'product_short_description',
|
|
|
|
|
-# 'product_type',
|
|
|
|
|
-# 'image_path'
|
|
|
|
|
-# }
|
|
|
|
|
-
|
|
|
|
|
-# if not expected_cols.issubset(df.columns):
|
|
|
|
|
-# return Response({
|
|
|
|
|
-# 'error': 'Missing required columns',
|
|
|
|
|
-# 'required_columns': list(expected_cols)
|
|
|
|
|
-# }, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
|
-
|
|
|
|
|
-# # Loop through rows and create Product entries
|
|
|
|
|
-# created_count = 0
|
|
|
|
|
-# for _, row in df.iterrows():
|
|
|
|
|
-# Product.objects.create(
|
|
|
|
|
-# item_id=row.get('item_id', ''),
|
|
|
|
|
-# product_name=row.get('product_name', ''),
|
|
|
|
|
-# product_long_description=row.get('product_long_description', ''),
|
|
|
|
|
-# product_short_description=row.get('product_short_description', ''),
|
|
|
|
|
-# product_type=row.get('product_type', ''),
|
|
|
|
|
-# image_path=row.get('image_path', ''),
|
|
|
|
|
-# )
|
|
|
|
|
-# created_count += 1
|
|
|
|
|
-
|
|
|
|
|
-# return Response({
|
|
|
|
|
-# 'message': f'Successfully uploaded {created_count} products.'
|
|
|
|
|
-# }, status=status.HTTP_201_CREATED)
|
|
|
|
|
-
|
|
|
|
|
-# except Exception as e:
|
|
|
|
|
-# return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
class ProductUploadExcelView(APIView):
|
|
class ProductUploadExcelView(APIView):
|
|
|
"""
|
|
"""
|
|
|
POST API to upload an Excel file and add data to Product model (skip duplicates)
|
|
POST API to upload an Excel file and add data to Product model (skip duplicates)
|
|
@@ -665,11 +636,6 @@ class ProductUploadExcelView(APIView):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
import pandas as pd
|
|
import pandas as pd
|
|
|
from rest_framework.views import APIView
|
|
from rest_framework.views import APIView
|
|
|
from rest_framework.response import Response
|
|
from rest_framework.response import Response
|
|
@@ -678,6 +644,7 @@ from rest_framework.parsers import MultiPartParser, FormParser
|
|
|
from .models import ProductType, ProductAttribute, AttributePossibleValue
|
|
from .models import ProductType, ProductAttribute, AttributePossibleValue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+
|
|
|
class ProductAttributesUploadView(APIView):
|
|
class ProductAttributesUploadView(APIView):
|
|
|
"""
|
|
"""
|
|
|
POST API to upload an Excel file and add mandatory/additional attributes
|
|
POST API to upload an Excel file and add mandatory/additional attributes
|