retrieval_flickr.yaml 658 B

12345678910111213141516171819202122232425262728293031323334
  1. image_root: '/export/share/datasets/vision/flickr30k/'
  2. ann_root: 'annotation'
  3. dataset: 'flickr'
  4. # set pretrained as a file path or an url
  5. pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_flickr.pth'
  6. # size of vit model; base or large
  7. vit: 'base'
  8. batch_size_train: 32
  9. batch_size_test: 64
  10. vit_grad_ckpt: True
  11. vit_ckpt_layer: 4
  12. init_lr: 1e-5
  13. # vit: 'large'
  14. # batch_size_train: 16
  15. # batch_size_test: 32
  16. # vit_grad_ckpt: True
  17. # vit_ckpt_layer: 10
  18. # init_lr: 5e-6
  19. image_size: 384
  20. queue_size: 57600
  21. alpha: 0.4
  22. k_test: 128
  23. negative_all_rank: False
  24. # optimizer
  25. weight_decay: 0.05
  26. min_lr: 0
  27. max_epoch: 6