nocaps.yaml 352 B

123456789101112131415
  1. image_root: '/export/share/datasets/vision/nocaps/'
  2. ann_root: 'annotation'
  3. # set pretrained as a file path or an url
  4. pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
  5. vit: 'base'
  6. batch_size: 32
  7. image_size: 384
  8. max_length: 20
  9. min_length: 5
  10. num_beams: 3
  11. prompt: 'a picture of '