| 12345678910111213141516171819202122232425 |
- vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/
- vg_root: '/export/share/datasets/vision/visual-genome/' #followed by image/
- train_files: ['vqa_train','vqa_val','vg_qa']
- ann_root: 'annotation'
- # set pretrained as a file path or an url
- pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'
- # size of vit model; base or large
- vit: 'base'
- batch_size_train: 16
- batch_size_test: 32
- vit_grad_ckpt: False
- vit_ckpt_layer: 0
- init_lr: 2e-5
- image_size: 480
- k_test: 128
- inference: 'rank'
- # optimizer
- weight_decay: 0.05
- min_lr: 0
- max_epoch: 10
|