vqa.yaml 639 B

12345678910111213141516171819202122232425
  1. vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/
  2. vg_root: '/export/share/datasets/vision/visual-genome/' #followed by image/
  3. train_files: ['vqa_train','vqa_val','vg_qa']
  4. ann_root: 'annotation'
  5. # set pretrained as a file path or an url
  6. pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'
  7. # size of vit model; base or large
  8. vit: 'base'
  9. batch_size_train: 16
  10. batch_size_test: 32
  11. vit_grad_ckpt: False
  12. vit_ckpt_layer: 0
  13. init_lr: 2e-5
  14. image_size: 480
  15. k_test: 128
  16. inference: 'rank'
  17. # optimizer
  18. weight_decay: 0.05
  19. min_lr: 0
  20. max_epoch: 10