- image_root: '/export/share/datasets/vision/nocaps/'
- ann_root: 'annotation'
- # set pretrained as a file path or an url
- pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
- vit: 'base'
- batch_size: 32
- image_size: 384
- max_length: 20
- min_length: 5
- num_beams: 3
- prompt: 'a picture of '
|