| 12345678910111213141516171819202122232425262728293031323334 |
- image_root: '/export/share/datasets/vision/flickr30k/'
- ann_root: 'annotation'
- dataset: 'flickr'
- # set pretrained as a file path or an url
- pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_flickr.pth'
- # size of vit model; base or large
- vit: 'base'
- batch_size_train: 32
- batch_size_test: 64
- vit_grad_ckpt: True
- vit_ckpt_layer: 4
- init_lr: 1e-5
- # vit: 'large'
- # batch_size_train: 16
- # batch_size_test: 32
- # vit_grad_ckpt: True
- # vit_ckpt_layer: 10
- # init_lr: 5e-6
- image_size: 384
- queue_size: 57600
- alpha: 0.4
- k_test: 128
- negative_all_rank: False
- # optimizer
- weight_decay: 0.05
- min_lr: 0
- max_epoch: 6
|