# # #==================== views.py ==================== # # from rest_framework.views import APIView # # from rest_framework.response import Response # # from rest_framework import status # # from .serializers import ( # # ProductAttributeRequestSerializer, # # ProductAttributeResponseSerializer # # ) # # from .services import ProductAttributeService # # class ExtractProductAttributesView(APIView): # # """ # # API endpoint to extract product attributes using Groq LLM. # # POST /api/extract-attributes/ # # Request Body: # # { # # "title": "Product title (optional)", # # "short_desc": "Short description (optional)", # # "long_desc": "Long description (optional)", # # "mandatory_attrs": { # # "Attribute1": ["value1", "value2", "value3"], # # "Attribute2": ["valueA", "valueB"] # # }, # # "model": "llama-3.1-8b-instant (optional)", # # "extract_additional": true (optional, default: true) # # } # # Response: # # { # # "mandatory": { # # "Attribute1": "value1", # # "Attribute2": "valueA" # # }, # # "additional": { # # "Color": "Blue", # # "Brand": "Example" # # } # # } # # """ # # def post(self, request): # # # Validate request data # # serializer = ProductAttributeRequestSerializer(data=request.data) # # if not serializer.is_valid(): # # return Response( # # {"error": serializer.errors}, # # status=status.HTTP_400_BAD_REQUEST # # ) # # validated_data = serializer.validated_data # # # Combine product text # # product_text = ProductAttributeService.combine_product_text( # # title=validated_data.get('title'), # # short_desc=validated_data.get('short_desc'), # # long_desc=validated_data.get('long_desc') # # ) # # # Extract attributes # # result = ProductAttributeService.extract_attributes( # # product_text=product_text, # # mandatory_attrs=validated_data['mandatory_attrs'], # # model=validated_data.get('model'), # # extract_additional=validated_data.get('extract_additional', True) # # ) # # # Return response # # response_serializer = ProductAttributeResponseSerializer(data=result) # # if response_serializer.is_valid(): # # return Response(response_serializer.data, status=status.HTTP_200_OK) # # return Response(result, status=status.HTTP_200_OK) # from rest_framework.views import APIView # from rest_framework.response import Response # from rest_framework import status # from .serializers import ( # SingleProductRequestSerializer, # BatchProductRequestSerializer, # ProductAttributeResultSerializer, # BatchProductResponseSerializer # ) # from .services import ProductAttributeService # class ExtractProductAttributesView(APIView): # """ # API endpoint to extract product attributes for a single product. # POST /api/extract-attributes/ # Request Body: # { # "title": "Product title (optional)", # "short_desc": "Short description (optional)", # "long_desc": "Long description (optional)", # "mandatory_attrs": { # "Attribute1": ["value1", "value2", "value3"], # "Attribute2": ["valueA", "valueB"] # }, # "model": "llama-3.1-8b-instant (optional)", # "extract_additional": true (optional, default: true) # } # """ # def post(self, request): # serializer = SingleProductRequestSerializer(data=request.data) # if not serializer.is_valid(): # return Response( # {"error": serializer.errors}, # status=status.HTTP_400_BAD_REQUEST # ) # validated_data = serializer.validated_data # product_text = ProductAttributeService.combine_product_text( # title=validated_data.get('title'), # short_desc=validated_data.get('short_desc'), # long_desc=validated_data.get('long_desc') # ) # result = ProductAttributeService.extract_attributes( # product_text=product_text, # mandatory_attrs=validated_data['mandatory_attrs'], # model=validated_data.get('model'), # extract_additional=validated_data.get('extract_additional', True) # ) # response_serializer = ProductAttributeResultSerializer(data=result) # if response_serializer.is_valid(): # return Response(response_serializer.data, status=status.HTTP_200_OK) # return Response(result, status=status.HTTP_200_OK) # class BatchExtractProductAttributesView(APIView): # """ # API endpoint to extract product attributes for multiple products in batch. # POST /api/batch-extract-attributes/ # Request Body: # { # "products": [ # { # "product_id": "prod_001", # "title": "Product 1 title", # "short_desc": "Short description", # "long_desc": "Long description" # }, # { # "product_id": "prod_002", # "title": "Product 2 title", # "short_desc": "Short description" # } # ], # "mandatory_attrs": { # "Attribute1": ["value1", "value2", "value3"], # "Attribute2": ["valueA", "valueB"] # }, # "model": "llama-3.1-8b-instant (optional)", # "extract_additional": true (optional, default: true) # } # Response: # { # "results": [ # { # "product_id": "prod_001", # "mandatory": {...}, # "additional": {...} # }, # { # "product_id": "prod_002", # "mandatory": {...}, # "additional": {...} # } # ], # "total_products": 2, # "successful": 2, # "failed": 0 # } # """ # def post(self, request): # serializer = BatchProductRequestSerializer(data=request.data) # if not serializer.is_valid(): # return Response( # {"error": serializer.errors}, # status=status.HTTP_400_BAD_REQUEST # ) # validated_data = serializer.validated_data # # Extract attributes for all products in batch # result = ProductAttributeService.extract_attributes_batch( # products=validated_data['products'], # mandatory_attrs=validated_data['mandatory_attrs'], # model=validated_data.get('model'), # extract_additional=validated_data.get('extract_additional', True) # ) # response_serializer = BatchProductResponseSerializer(data=result) # if response_serializer.is_valid(): # return Response(response_serializer.data, status=status.HTTP_200_OK) # return Response(result, status=status.HTTP_200_OK) # ==================== views.py ==================== from rest_framework.views import APIView from rest_framework.response import Response from rest_framework import status from .serializers import ( SingleProductRequestSerializer, BatchProductRequestSerializer, ProductAttributeResultSerializer, BatchProductResponseSerializer ) from .services import ProductAttributeService from .ocr_service import OCRService # class ExtractProductAttributesView(APIView): # """ # API endpoint to extract product attributes for a single product. # Now supports image URL for OCR-based text extraction. # """ # def post(self, request): # serializer = SingleProductRequestSerializer(data=request.data) # if not serializer.is_valid(): # return Response( # {"error": serializer.errors}, # status=status.HTTP_400_BAD_REQUEST # ) # validated_data = serializer.validated_data # # Process image if URL provided # ocr_results = None # ocr_text = None # if validated_data.get('process_image', True) and validated_data.get('image_url'): # ocr_service = OCRService() # ocr_results = ocr_service.process_image(validated_data['image_url']) # # Extract attributes from OCR # if ocr_results and ocr_results.get('detected_text'): # ocr_attrs = ProductAttributeService.extract_attributes_from_ocr( # ocr_results, # validated_data.get('model') # ) # ocr_results['extracted_attributes'] = ocr_attrs # # Format OCR text # ocr_text = "\n".join([ # f"{item['text']} (confidence: {item['confidence']:.2f})" # for item in ocr_results['detected_text'] # ]) # # Combine all product information # product_text = ProductAttributeService.combine_product_text( # title=validated_data.get('title'), # short_desc=validated_data.get('short_desc'), # long_desc=validated_data.get('long_desc'), # ocr_text=ocr_text # ) # # Extract attributes # result = ProductAttributeService.extract_attributes( # product_text=product_text, # mandatory_attrs=validated_data['mandatory_attrs'], # model=validated_data.get('model'), # extract_additional=validated_data.get('extract_additional', True) # ) # # Add OCR results if available # if ocr_results: # result['ocr_results'] = ocr_results # response_serializer = ProductAttributeResultSerializer(data=result) # if response_serializer.is_valid(): # return Response(response_serializer.data, status=status.HTTP_200_OK) # return Response(result, status=status.HTTP_200_OK) from .models import Product class ExtractProductAttributesView(APIView): """ API endpoint to extract product attributes for a single product by item_id. Fetches product details from database. """ def post(self, request): serializer = SingleProductRequestSerializer(data=request.data) if not serializer.is_valid(): return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data item_id = validated_data.get("item_id") # Fetch product from DB try: product = Product.objects.get(item_id=item_id) except Product.DoesNotExist: return Response( {"error": f"Product with item_id '{item_id}' not found."}, status=status.HTTP_404_NOT_FOUND ) # Extract product details title = product.product_name short_desc = product.product_short_description long_desc = product.product_long_description image_url = product.image_path # Process image for OCR if required ocr_results = None ocr_text = None if validated_data.get("process_image", True) and image_url: ocr_service = OCRService() ocr_results = ocr_service.process_image(image_url) if ocr_results and ocr_results.get("detected_text"): ocr_attrs = ProductAttributeService.extract_attributes_from_ocr( ocr_results, validated_data.get("model") ) ocr_results["extracted_attributes"] = ocr_attrs ocr_text = "\n".join([ f"{item['text']} (confidence: {item['confidence']:.2f})" for item in ocr_results["detected_text"] ]) # Combine all product text product_text = ProductAttributeService.combine_product_text( title=title, short_desc=short_desc, long_desc=long_desc, ocr_text=ocr_text ) # Extract attributes result = ProductAttributeService.extract_attributes( product_text=product_text, mandatory_attrs=validated_data["mandatory_attrs"], model=validated_data.get("model"), extract_additional=validated_data.get("extract_additional", True) ) # Attach OCR results if available if ocr_results: result["ocr_results"] = ocr_results response_serializer = ProductAttributeResultSerializer(data=result) if response_serializer.is_valid(): return Response(response_serializer.data, status=status.HTTP_200_OK) return Response(result, status=status.HTTP_200_OK) # class BatchExtractProductAttributesView(APIView): # """ # API endpoint to extract product attributes for multiple products in batch. # Now supports image URLs for OCR-based text extraction. # """ # def post(self, request): # serializer = BatchProductRequestSerializer(data=request.data) # if not serializer.is_valid(): # return Response( # {"error": serializer.errors}, # status=status.HTTP_400_BAD_REQUEST # ) # validated_data = serializer.validated_data # # Extract attributes for all products in batch # result = ProductAttributeService.extract_attributes_batch( # products=validated_data['products'], # mandatory_attrs=validated_data['mandatory_attrs'], # model=validated_data.get('model'), # extract_additional=validated_data.get('extract_additional', True), # process_image=validated_data.get('process_image', True) # ) # response_serializer = BatchProductResponseSerializer(data=result) # if response_serializer.is_valid(): # return Response(response_serializer.data, status=status.HTTP_200_OK) # return Response(result, status=status.HTTP_200_OK) from .models import Product class BatchExtractProductAttributesView(APIView): """ API endpoint to extract product attributes for multiple products in batch by item_id. Fetches all product details from database automatically. """ def post(self, request): serializer = BatchProductRequestSerializer(data=request.data) if not serializer.is_valid(): return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data item_ids = validated_data.get("item_ids", []) model = validated_data.get("model") extract_additional = validated_data.get("extract_additional", True) process_image = validated_data.get("process_image", True) mandatory_attrs = validated_data["mandatory_attrs"] # Fetch all products in one query products = Product.objects.filter(item_id__in=item_ids) found_ids = set(products.values_list("item_id", flat=True)) missing_ids = [pid for pid in item_ids if pid not in found_ids] results = [] successful = 0 failed = 0 for product in products: try: title = product.product_name short_desc = product.product_short_description long_desc = product.product_long_description image_url = product.image_path ocr_results = None ocr_text = None if process_image and image_url: ocr_service = OCRService() ocr_results = ocr_service.process_image(image_url) if ocr_results and ocr_results.get("detected_text"): ocr_attrs = ProductAttributeService.extract_attributes_from_ocr( ocr_results, model ) ocr_results["extracted_attributes"] = ocr_attrs ocr_text = "\n".join([ f"{item['text']} (confidence: {item['confidence']:.2f})" for item in ocr_results["detected_text"] ]) product_text = ProductAttributeService.combine_product_text( title=title, short_desc=short_desc, long_desc=long_desc, ocr_text=ocr_text ) extracted = ProductAttributeService.extract_attributes( product_text=product_text, mandatory_attrs=mandatory_attrs, model=model, extract_additional=extract_additional ) result = { "product_id": product.item_id, "mandatory": extracted.get("mandatory", {}), "additional": extracted.get("additional", {}), } if ocr_results: result["ocr_results"] = ocr_results results.append(result) successful += 1 except Exception as e: failed += 1 results.append({ "product_id": product.item_id, "error": str(e) }) # Add missing item_ids as failed entries for mid in missing_ids: failed += 1 results.append({ "product_id": mid, "error": "Product not found in database" }) batch_result = { "results": results, "total_products": len(item_ids), "successful": successful, "failed": failed } response_serializer = BatchProductResponseSerializer(data=batch_result) if response_serializer.is_valid(): return Response(response_serializer.data, status=status.HTTP_200_OK) return Response(batch_result, status=status.HTTP_200_OK) from rest_framework.views import APIView from rest_framework.response import Response from rest_framework import status from .models import Product from .serializers import ProductSerializer class ProductListView(APIView): """ GET API to list all products with details """ def get(self, request): products = Product.objects.all() serializer = ProductSerializer(products, many=True) return Response(serializer.data, status=status.HTTP_200_OK) import pandas as pd from rest_framework.parsers import MultiPartParser, FormParser from rest_framework.views import APIView from rest_framework.response import Response from rest_framework import status from .models import Product from .serializers import ProductSerializer # class ProductUploadExcelView(APIView): # """ # POST API to upload an Excel file and add data to Product model # """ # parser_classes = (MultiPartParser, FormParser) # def post(self, request, *args, **kwargs): # file_obj = request.FILES.get('file') # if not file_obj: # return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST) # try: # # Read the Excel file # df = pd.read_excel(file_obj) # # Normalize column names # df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns] # # Expected columns # expected_cols = { # 'item_id', # 'product_name', # 'product_long_description', # 'product_short_description', # 'product_type', # 'image_path' # } # if not expected_cols.issubset(df.columns): # return Response({ # 'error': 'Missing required columns', # 'required_columns': list(expected_cols) # }, status=status.HTTP_400_BAD_REQUEST) # # Loop through rows and create Product entries # created_count = 0 # for _, row in df.iterrows(): # Product.objects.create( # item_id=row.get('item_id', ''), # product_name=row.get('product_name', ''), # product_long_description=row.get('product_long_description', ''), # product_short_description=row.get('product_short_description', ''), # product_type=row.get('product_type', ''), # image_path=row.get('image_path', ''), # ) # created_count += 1 # return Response({ # 'message': f'Successfully uploaded {created_count} products.' # }, status=status.HTTP_201_CREATED) # except Exception as e: # return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) class ProductUploadExcelView(APIView): """ POST API to upload an Excel file and add data to Product model (skip duplicates) """ parser_classes = (MultiPartParser, FormParser) def post(self, request, *args, **kwargs): file_obj = request.FILES.get('file') if not file_obj: return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST) try: import pandas as pd df = pd.read_excel(file_obj) df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns] expected_cols = { 'item_id', 'product_name', 'product_long_description', 'product_short_description', 'product_type', 'image_path' } if not expected_cols.issubset(df.columns): return Response({ 'error': 'Missing required columns', 'required_columns': list(expected_cols) }, status=status.HTTP_400_BAD_REQUEST) created_count = 0 skipped_count = 0 for _, row in df.iterrows(): item_id = row.get('item_id', '') # Check if this item already exists if Product.objects.filter(item_id=item_id).exists(): skipped_count += 1 continue Product.objects.create( item_id=item_id, product_name=row.get('product_name', ''), product_long_description=row.get('product_long_description', ''), product_short_description=row.get('product_short_description', ''), product_type=row.get('product_type', ''), image_path=row.get('image_path', ''), ) created_count += 1 return Response({ 'message': f'Successfully uploaded {created_count} products.', 'skipped': f'Skipped {skipped_count} duplicates.' }, status=status.HTTP_201_CREATED) except Exception as e: return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)